diff options
Diffstat (limited to 'fs')
133 files changed, 1255 insertions, 877 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 3062cceb5c2a..782d4d05a53b 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -350,7 +350,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; - struct uuid_v1 *r; + struct afs_uuid *r; unsigned loop; __be32 *b; int ret; @@ -380,7 +380,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) } _debug("unmarshall UUID"); - call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); if (!call->request) return -ENOMEM; @@ -453,7 +453,7 @@ static int afs_deliver_cb_probe(struct afs_call *call) static void SRXAFSCB_ProbeUuid(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, work); - struct uuid_v1 *r = call->request; + struct afs_uuid *r = call->request; struct { __be32 match; @@ -476,7 +476,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) */ static int afs_deliver_cb_probe_uuid(struct afs_call *call) { - struct uuid_v1 *r; + struct afs_uuid *r; unsigned loop; __be32 *b; int ret; @@ -502,15 +502,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) } _debug("unmarshall UUID"); - call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); if (!call->request) return -ENOMEM; b = call->buffer; r = call->request; - r->time_low = b[0]; - r->time_mid = htons(ntohl(b[1])); - r->time_hi_and_version = htons(ntohl(b[2])); + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); r->clock_seq_hi_and_reserved = ntohl(b[3]); r->clock_seq_low = ntohl(b[4]); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 393672997cc2..4e2556606623 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -410,6 +410,15 @@ struct afs_interface { unsigned mtu; /* MTU of interface */ }; +struct afs_uuid { + __be32 time_low; /* low part of timestamp */ + __be16 time_mid; /* mid part of timestamp */ + __be16 time_hi_and_version; /* high part of timestamp and version */ + __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ + __u8 clock_seq_low; /* clock seq low */ + __u8 node[6]; /* spatially unique node ID (MAC addr) */ +}; + /*****************************************************************************/ /* * cache.c @@ -544,7 +553,7 @@ extern int afs_drop_inode(struct inode *); * main.c */ extern struct workqueue_struct *afs_wq; -extern struct uuid_v1 afs_uuid; +extern struct afs_uuid afs_uuid; /* * misc.c diff --git a/fs/afs/main.c b/fs/afs/main.c index 51d7d17bca57..9944770849da 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -31,7 +31,7 @@ static char *rootcell; module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); -struct uuid_v1 afs_uuid; +struct afs_uuid afs_uuid; struct workqueue_struct *afs_wq; /* @@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ssize_t ret; /* enforce forwards compatibility on users */ - if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) { + if (unlikely(iocb->aio_reserved2)) { pr_debug("EINVAL: reserve field set\n"); return -EINVAL; } @@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->common.ki_pos = iocb->aio_offset; req->common.ki_complete = aio_complete; req->common.ki_flags = iocb_flags(req->common.ki_filp); + req->common.ki_hint = file_write_hint(file); if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* @@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->common.ki_flags |= IOCB_EVENTFD; } + ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags); + if (unlikely(ret)) { + pr_debug("EINVAL: aio_rw_flags\n"); + goto out_put_req; + } + + if ((req->common.ki_flags & IOCB_NOWAIT) && + !(req->common.ki_flags & IOCB_DIRECT)) { + ret = -EOPNOTSUPP; + goto out_put_req; + } + ret = put_user(KIOCB_KEY, &user_iocb->aio_key); if (unlikely(ret)) { pr_debug("EFAULT: aio_key\n"); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index beef981aa54f..974f5346458a 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -83,7 +83,7 @@ struct autofs_info { struct autofs_wait_queue { wait_queue_head_t queue; struct autofs_wait_queue *next; - autofs_wqt_t wait_queue_token; + autofs_wqt_t wait_queue_entry_token; /* We use the following to see what we are waiting for */ struct qstr name; u32 dev; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 734cbf8d9676..dd9f1bebb5a3 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp, int status; token = (autofs_wqt_t) param->fail.token; - status = param->fail.status ? param->fail.status : -ENOENT; + status = param->fail.status < 0 ? param->fail.status : -ENOENT; return autofs4_wait_release(sbi, token, status); } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 24a58bf9ca72..7071895b0678 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, size_t pktsz; pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n", - (unsigned long) wq->wait_queue_token, + (unsigned long) wq->wait_queue_entry_token, wq->name.len, wq->name.name, type); memset(&pkt, 0, sizeof(pkt)); /* For security reasons */ @@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*mp); - mp->wait_queue_token = wq->wait_queue_token; + mp->wait_queue_entry_token = wq->wait_queue_entry_token; mp->len = wq->name.len; memcpy(mp->name, wq->name.name, wq->name.len); mp->name[wq->name.len] = '\0'; @@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*ep); - ep->wait_queue_token = wq->wait_queue_token; + ep->wait_queue_entry_token = wq->wait_queue_entry_token; ep->len = wq->name.len; memcpy(ep->name, wq->name.name, wq->name.len); ep->name[wq->name.len] = '\0'; @@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pktsz = sizeof(*packet); - packet->wait_queue_token = wq->wait_queue_token; + packet->wait_queue_entry_token = wq->wait_queue_entry_token; packet->len = wq->name.len; memcpy(packet->name, wq->name.name, wq->name.len); packet->name[wq->name.len] = '\0'; @@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, return -ENOMEM; } - wq->wait_queue_token = autofs4_next_wait_queue; + wq->wait_queue_entry_token = autofs4_next_wait_queue; if (++autofs4_next_wait_queue == 0) autofs4_next_wait_queue = 1; wq->next = sbi->queues; @@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, } pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", - (unsigned long) wq->wait_queue_token, wq->name.len, + (unsigned long) wq->wait_queue_entry_token, wq->name.len, wq->name.name, notify); /* @@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, } else { wq->wait_ctr++; pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", - (unsigned long) wq->wait_queue_token, wq->name.len, + (unsigned long) wq->wait_queue_entry_token, wq->name.len, wq->name.name, notify); mutex_unlock(&sbi->wq_mutex); kfree(qstr.name); @@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi, } -int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status) +int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status) { struct autofs_wait_queue *wq, **wql; mutex_lock(&sbi->wq_mutex); for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) { - if (wq->wait_queue_token == wait_queue_token) + if (wq->wait_queue_entry_token == wait_queue_entry_token) break; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 519599dddd36..a7df151f8aba 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bio_init(&bio, vecs, nr_pages); bio.bi_bdev = bdev; bio.bi_iter.bi_sector = pos >> 9; + bio.bi_write_hint = iocb->ki_hint; bio.bi_private = current; bio.bi_end_io = blkdev_bio_end_io_simple; @@ -262,8 +263,11 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, if (vecs != inline_vecs) kfree(vecs); - if (unlikely(bio.bi_error)) - return bio.bi_error; + if (unlikely(bio.bi_status)) + ret = blk_status_to_errno(bio.bi_status); + + bio_uninit(&bio); + return ret; } @@ -288,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio) bool should_dirty = dio->should_dirty; if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { - if (bio->bi_error && !dio->bio.bi_error) - dio->bio.bi_error = bio->bi_error; + if (bio->bi_status && !dio->bio.bi_status) + dio->bio.bi_status = bio->bi_status; } else { if (!dio->is_sync) { struct kiocb *iocb = dio->iocb; - ssize_t ret = dio->bio.bi_error; + ssize_t ret; - if (likely(!ret)) { + if (likely(!dio->bio.bi_status)) { ret = dio->size; iocb->ki_pos += ret; + } else { + ret = blk_status_to_errno(dio->bio.bi_status); } dio->iocb->ki_complete(iocb, ret, 0); @@ -334,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; - int ret; + int ret = 0; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) @@ -358,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) for (;;) { bio->bi_bdev = bdev; bio->bi_iter.bi_sector = pos >> 9; + bio->bi_write_hint = iocb->ki_hint; bio->bi_private = dio; bio->bi_end_io = blkdev_bio_end_io; ret = bio_iov_iter_get_pages(bio, iter); if (unlikely(ret)) { - bio->bi_error = ret; + bio->bi_status = BLK_STS_IOERR; bio_endio(bio); break; } @@ -412,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } __set_current_state(TASK_RUNNING); - ret = dio->bio.bi_error; + if (!ret) + ret = blk_status_to_errno(dio->bio.bi_status); if (likely(!ret)) ret = dio->size; @@ -436,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static __init int blkdev_init(void) { - blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio)); + blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS); if (!blkdev_dio_pool) return -ENOMEM; return 0; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b8622e4d1744..d87ac27a5f2b 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -310,7 +310,8 @@ struct btrfs_dio_private { * The original bio may be split to several sub-bios, this is * done during endio of sub-bios */ - int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int); + blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *, + blk_status_t); }; /* diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ab14c2e635ca..4ded1c3f92b8 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp) /* mutex is not held! This is not save if IO is not yet completed * on umount */ iodone_w_error = 0; - if (bp->bi_error) + if (bp->bi_status) iodone_w_error = 1; BUG_ON(NULL == block); @@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp) if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", - bp->bi_error, + bp->bi_status, btrfsic_get_block_type(dev_state->state, block), block->logical_bytenr, dev_state->name, block->dev_bytenr, block->mirror_num); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 10e6b282d09d..a2fad39f79ba 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio) unsigned long index; int ret; - if (bio->bi_error) + if (bio->bi_status) cb->errors = 1; /* if there are more bios still pending for this compressed @@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio) struct page *page; unsigned long index; - if (bio->bi_error) + if (bio->bi_status) cb->errors = 1; /* if there are more bios still pending for this compressed @@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio) cb->start, cb->start + cb->len - 1, NULL, - bio->bi_error ? 0 : 1); + bio->bi_status ? 0 : 1); cb->compressed_pages[0]->mapping = NULL; end_compressed_writeback(inode, cb); @@ -320,7 +320,7 @@ out: * This also checksums the file bytes and gets things ready for * the end io hooks. */ -int btrfs_submit_compressed_write(struct inode *inode, u64 start, +blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, struct page **compressed_pages, @@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, struct page *page; u64 first_byte = disk_start; struct block_device *bdev; - int ret; + blk_status_t ret; int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; WARN_ON(start & ((u64)PAGE_SIZE - 1)); cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); if (!cb) - return -ENOMEM; + return BLK_STS_RESOURCE; refcount_set(&cb->pending_bios, 0); cb->errors = 0; cb->inode = inode; @@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); if (!bio) { kfree(cb); - return -ENOMEM; + return BLK_STS_RESOURCE; } bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_private = cb; @@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, /* create and submit bios for the compressed pages */ bytes_left = compressed_len; for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { + int submit = 0; + page = compressed_pages[pg_index]; page->mapping = inode->i_mapping; if (bio->bi_iter.bi_size) - ret = io_tree->ops->merge_bio_hook(page, 0, + submit = io_tree->ops->merge_bio_hook(page, 0, PAGE_SIZE, bio, 0); - else - ret = 0; page->mapping = NULL; - if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) < + if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_get(bio); @@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_map_bio(fs_info, bio, 0, 1); if (ret) { - bio->bi_error = ret; + bio->bi_status = ret; bio_endio(bio); } @@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_map_bio(fs_info, bio, 0, 1); if (ret) { - bio->bi_error = ret; + bio->bi_status = ret; bio_endio(bio); } @@ -569,7 +569,7 @@ next: * After the compressed pages are read, we copy the bytes into the * bio we were passed and then call the bio end_io calls */ -int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, +blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, u64 em_len; u64 em_start; struct extent_map *em; - int ret = -ENOMEM; + blk_status_t ret = BLK_STS_RESOURCE; int faili = 0; u32 *sums; @@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, PAGE_SIZE); read_unlock(&em_tree->lock); if (!em) - return -EIO; + return BLK_STS_IOERR; compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); @@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, __GFP_HIGHMEM); if (!cb->compressed_pages[pg_index]) { faili = pg_index - 1; - ret = -ENOMEM; + ret = BLK_STS_RESOURCE; goto fail2; } } @@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, refcount_set(&cb->pending_bios, 1); for (pg_index = 0; pg_index < nr_pages; pg_index++) { + int submit = 0; + page = cb->compressed_pages[pg_index]; page->mapping = inode->i_mapping; page->index = em_start >> PAGE_SHIFT; if (comp_bio->bi_iter.bi_size) - ret = tree->ops->merge_bio_hook(page, 0, + submit = tree->ops->merge_bio_hook(page, 0, PAGE_SIZE, comp_bio, 0); - else - ret = 0; page->mapping = NULL; - if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < + if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_get(comp_bio); @@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0); if (ret) { - comp_bio->bi_error = ret; + comp_bio->bi_status = ret; bio_endio(comp_bio); } @@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0); if (ret) { - comp_bio->bi_error = ret; + comp_bio->bi_status = ret; bio_endio(comp_bio); } diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 39ec43ab8df1..680d4265d601 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, unsigned long total_out, u64 disk_start, struct bio *bio); -int btrfs_submit_compressed_write(struct inode *inode, u64 start, +blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, struct page **compressed_pages, unsigned long nr_pages); -int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, +blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); enum btrfs_compression_type { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 643c70d2b2e6..a0d0c79d95ed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2563,7 +2563,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes); static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info, unsigned num_items) { - return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; } /* @@ -2573,7 +2573,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info, static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info, unsigned num_items) { - return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; } int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, @@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 len); -int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst); -int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, +blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst); +blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums); -int btrfs_csum_one_bio(struct inode *inode, struct bio *bio, +blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, u64 file_start, int contig); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 60a750678a82..c24d615e3d7f 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_fs_info *fs_info, if (btrfs_dir_name_len(leaf, dir_item) > namelen) { btrfs_crit(fs_info, "invalid dir item name len: %u", - (unsigned)btrfs_dir_data_len(leaf, dir_item)); + (unsigned)btrfs_dir_name_len(leaf, dir_item)); return 1; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8685d67185d0..6036d15b47b8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -87,7 +87,7 @@ struct btrfs_end_io_wq { bio_end_io_t *end_io; void *private; struct btrfs_fs_info *info; - int error; + blk_status_t status; enum btrfs_wq_endio_type metadata; struct list_head list; struct btrfs_work work; @@ -131,7 +131,7 @@ struct async_submit_bio { */ u64 bio_offset; struct btrfs_work work; - int error; + blk_status_t status; }; /* @@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio) btrfs_work_func_t func; fs_info = end_io_wq->info; - end_io_wq->error = bio->bi_error; + end_io_wq->status = bio->bi_status; if (bio_op(bio) == REQ_OP_WRITE) { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { @@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio) btrfs_queue_work(wq, &end_io_wq->work); } -int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, +blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata) { struct btrfs_end_io_wq *end_io_wq; end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); if (!end_io_wq) - return -ENOMEM; + return BLK_STS_RESOURCE; end_io_wq->private = bio->bi_private; end_io_wq->end_io = bio->bi_end_io; end_io_wq->info = info; - end_io_wq->error = 0; + end_io_wq->status = 0; end_io_wq->bio = bio; end_io_wq->metadata = metadata; @@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) static void run_one_async_start(struct btrfs_work *work) { struct async_submit_bio *async; - int ret; + blk_status_t ret; async = container_of(work, struct async_submit_bio, work); ret = async->submit_bio_start(async->inode, async->bio, async->mirror_num, async->bio_flags, async->bio_offset); if (ret) - async->error = ret; + async->status = ret; } static void run_one_async_done(struct btrfs_work *work) @@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work) wake_up(&fs_info->async_submit_wait); /* If an error occurred we just want to clean up the bio and move on */ - if (async->error) { - async->bio->bi_error = async->error; + if (async->status) { + async->bio->bi_status = async->status; bio_endio(async->bio); return; } @@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work) kfree(async); } -int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, - struct bio *bio, int mirror_num, - unsigned long bio_flags, - u64 bio_offset, - extent_submit_bio_hook_t *submit_bio_start, - extent_submit_bio_hook_t *submit_bio_done) +blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, + struct inode *inode, struct bio *bio, int mirror_num, + unsigned long bio_flags, u64 bio_offset, + extent_submit_bio_hook_t *submit_bio_start, + extent_submit_bio_hook_t *submit_bio_done) { struct async_submit_bio *async; async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) - return -ENOMEM; + return BLK_STS_RESOURCE; async->inode = inode; async->bio = bio; @@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->bio_flags = bio_flags; async->bio_offset = bio_offset; - async->error = 0; + async->status = 0; atomic_inc(&fs_info->nr_async_submits); @@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, return 0; } -static int btree_csum_one_bio(struct bio *bio) +static blk_status_t btree_csum_one_bio(struct bio *bio) { struct bio_vec *bvec; struct btrfs_root *root; @@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio) break; } - return ret; + return errno_to_blk_status(ret); } -static int __btree_submit_bio_start(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) +static blk_status_t __btree_submit_bio_start(struct inode *inode, + struct bio *bio, int mirror_num, unsigned long bio_flags, + u64 bio_offset) { /* * when we're called for a write, we're already in the async @@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio, return btree_csum_one_bio(bio); } -static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) +static blk_status_t __btree_submit_bio_done(struct inode *inode, + struct bio *bio, int mirror_num, unsigned long bio_flags, + u64 bio_offset) { - int ret; + blk_status_t ret; /* * when we're called for a write, we're already in the async @@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, */ ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1); if (ret) { - bio->bi_error = ret; + bio->bi_status = ret; bio_endio(bio); } return ret; @@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags) return 1; } -static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, +static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int async = check_async_write(bio_flags); - int ret; + blk_status_t ret; if (bio_op(bio) != REQ_OP_WRITE) { /* @@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, return 0; out_w_error: - bio->bi_error = ret; + bio->bi_status = ret; bio_endio(bio); return ret; } @@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work) end_io_wq = container_of(work, struct btrfs_end_io_wq, work); bio = end_io_wq->bio; - bio->bi_error = end_io_wq->error; + bio->bi_status = end_io_wq->status; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); @@ -3467,10 +3466,12 @@ static int write_dev_supers(struct btrfs_device *device, * we fua the first super. The others we allow * to go down lazy. */ - if (i == 0) - ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh); - else + if (i == 0) { + ret = btrfsic_submit_bh(REQ_OP_WRITE, + REQ_SYNC | REQ_FUA, bh); + } else { ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + } if (ret) errors++; } @@ -3495,11 +3496,11 @@ static void btrfs_end_empty_barrier(struct bio *bio) * any device where the flush fails with eopnotsupp are flagged as not-barrier * capable */ -static int write_dev_flush(struct btrfs_device *device, int wait) +static blk_status_t write_dev_flush(struct btrfs_device *device, int wait) { struct request_queue *q = bdev_get_queue(device->bdev); struct bio *bio; - int ret = 0; + blk_status_t ret = 0; if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) return 0; @@ -3511,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); - if (bio->bi_error) { - ret = bio->bi_error; + if (bio->bi_status) { + ret = bio->bi_status; btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); } @@ -3531,11 +3532,11 @@ static int write_dev_flush(struct btrfs_device *device, int wait) device->flush_bio = NULL; bio = btrfs_io_bio_alloc(GFP_NOFS, 0); if (!bio) - return -ENOMEM; + return BLK_STS_RESOURCE; bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; @@ -3556,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) struct btrfs_device *dev; int errors_send = 0; int errors_wait = 0; - int ret; + blk_status_t ret; /* send down all the barriers */ head = &info->fs_devices->devices; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 21f1ceb85b76..c581927555f3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(const char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, u8 *result); -int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, +blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata); -int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, - struct bio *bio, int mirror_num, - unsigned long bio_flags, u64 bio_offset, - extent_submit_bio_hook_t *submit_bio_start, - extent_submit_bio_hook_t *submit_bio_done); +blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, + struct inode *inode, struct bio *bio, int mirror_num, + unsigned long bio_flags, u64 bio_offset, + extent_submit_bio_hook_t *submit_bio_start, + extent_submit_bio_hook_t *submit_bio_done); unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); int btrfs_write_tree_block(struct extent_buffer *buf); int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e390451c72e6..33d979e9ea2a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3993,6 +3993,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, info->space_info_kobj, "%s", alloc_name(found->flags)); if (ret) { + percpu_counter_destroy(&found->total_bytes_pinned); kfree(found); return ret; } @@ -4844,7 +4845,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, spin_unlock(&delayed_rsv->lock); commit: - trans = btrfs_join_transaction(fs_info->fs_root); + trans = btrfs_join_transaction(fs_info->extent_root); if (IS_ERR(trans)) return -ENOSPC; @@ -4862,7 +4863,7 @@ static int flush_space(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 num_bytes, u64 orig_bytes, int state) { - struct btrfs_root *root = fs_info->fs_root; + struct btrfs_root *root = fs_info->extent_root; struct btrfs_trans_handle *trans; int nr; int ret = 0; @@ -5062,7 +5063,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, int flush_state = FLUSH_DELAYED_ITEMS_NR; spin_lock(&space_info->lock); - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root, space_info); if (!to_reclaim) { spin_unlock(&space_info->lock); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d8da3edf2ac3..d1cd60140817 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -174,7 +174,8 @@ int __init extent_io_init(void) goto free_state_cache; btrfs_bioset = bioset_create(BIO_POOL_SIZE, - offsetof(struct btrfs_io_bio, bio)); + offsetof(struct btrfs_io_bio, bio), + BIOSET_NEED_BVECS); if (!btrfs_bioset) goto free_buffer_cache; @@ -2399,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct bio *bio; int read_mode = 0; + blk_status_t status; int ret; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -2431,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", read_mode, failrec->this_mirror, failrec->in_validation); - ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, + status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, failrec->bio_flags, 0); - if (ret) { + if (status) { free_io_failure(BTRFS_I(inode), failrec); bio_put(bio); + ret = blk_status_to_errno(status); } return ret; @@ -2458,7 +2461,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end) if (!uptodate) { ClearPageUptodate(page); SetPageError(page); - ret = ret < 0 ? ret : -EIO; + ret = err < 0 ? err : -EIO; mapping_set_error(page->mapping, ret); } } @@ -2474,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end) */ static void end_bio_extent_writepage(struct bio *bio) { + int error = blk_status_to_errno(bio->bi_status); struct bio_vec *bvec; u64 start; u64 end; @@ -2503,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; - end_extent_writepage(page, bio->bi_error, start, end); + end_extent_writepage(page, error, start, end); end_page_writeback(page); } @@ -2536,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, static void end_bio_extent_readpage(struct bio *bio) { struct bio_vec *bvec; - int uptodate = !bio->bi_error; + int uptodate = !bio->bi_status; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree; u64 offset = 0; @@ -2556,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio) btrfs_debug(fs_info, "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", - (u64)bio->bi_iter.bi_sector, bio->bi_error, + (u64)bio->bi_iter.bi_sector, bio->bi_status, io_bio->mirror_num); tree = &BTRFS_I(inode)->io_tree; @@ -2615,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio) ret = bio_readpage_error(bio, offset, page, start, end, mirror); if (ret == 0) { - uptodate = !bio->bi_error; + uptodate = !bio->bi_status; offset += len; continue; } @@ -2673,7 +2677,7 @@ readpage_ok: endio_readpage_release_extent(tree, extent_start, extent_len, uptodate); if (io_bio->end_io) - io_bio->end_io(io_bio, bio->bi_error); + io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status)); bio_put(bio); } @@ -2743,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) static int __must_check submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_flags) { - int ret = 0; + blk_status_t ret = 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct page *page = bvec->bv_page; struct extent_io_tree *tree = bio->bi_private; @@ -2761,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, btrfsic_submit_bio(bio); bio_put(bio); - return ret; + return blk_status_to_errno(ret); } static int merge_bio(struct extent_io_tree *tree, struct page *page, @@ -2826,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; + bio->bi_write_hint = page->mapping->host->i_write_hint; bio_set_op_attrs(bio, op, op_flags); if (wbc) { wbc_init_bio(wbc, bio); @@ -3707,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio) BUG_ON(!eb); done = atomic_dec_and_test(&eb->io_pages); - if (bio->bi_error || + if (bio->bi_status || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { ClearPageUptodate(page); set_btree_ioerr(page); @@ -4377,6 +4382,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, return NULL; } +/* + * To cache previous fiemap extent + * + * Will be used for merging fiemap extent + */ +struct fiemap_cache { + u64 offset; + u64 phys; + u64 len; + u32 flags; + bool cached; +}; + +/* + * Helper to submit fiemap extent. + * + * Will try to merge current fiemap extent specified by @offset, @phys, + * @len and @flags with cached one. + * And only when we fails to merge, cached one will be submitted as + * fiemap extent. + * + * Return value is the same as fiemap_fill_next_extent(). + */ +static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, + struct fiemap_cache *cache, + u64 offset, u64 phys, u64 len, u32 flags) +{ + int ret = 0; + + if (!cache->cached) + goto assign; + + /* + * Sanity check, extent_fiemap() should have ensured that new + * fiemap extent won't overlap with cahced one. + * Not recoverable. + * + * NOTE: Physical address can overlap, due to compression + */ + if (cache->offset + cache->len > offset) { + WARN_ON(1); + return -EINVAL; + } + + /* + * Only merges fiemap extents if + * 1) Their logical addresses are continuous + * + * 2) Their physical addresses are continuous + * So truly compressed (physical size smaller than logical size) + * extents won't get merged with each other + * + * 3) Share same flags except FIEMAP_EXTENT_LAST + * So regular extent won't get merged with prealloc extent + */ + if (cache->offset + cache->len == offset && + cache->phys + cache->len == phys && + (cache->flags & ~FIEMAP_EXTENT_LAST) == + (flags & ~FIEMAP_EXTENT_LAST)) { + cache->len += len; + cache->flags |= flags; + goto try_submit_last; + } + + /* Not mergeable, need to submit cached one */ + ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, + cache->len, cache->flags); + cache->cached = false; + if (ret) + return ret; +assign: + cache->cached = true; + cache->offset = offset; + cache->phys = phys; + cache->len = len; + cache->flags = flags; +try_submit_last: + if (cache->flags & FIEMAP_EXTENT_LAST) { + ret = fiemap_fill_next_extent(fieinfo, cache->offset, + cache->phys, cache->len, cache->flags); + cache->cached = false; + } + return ret; +} + +/* + * Sanity check for fiemap cache + * + * All fiemap cache should be submitted by emit_fiemap_extent() + * Iteration should be terminated either by last fiemap extent or + * fieinfo->fi_extents_max. + * So no cached fiemap should exist. + */ +static int check_fiemap_cache(struct btrfs_fs_info *fs_info, + struct fiemap_extent_info *fieinfo, + struct fiemap_cache *cache) +{ + int ret; + + if (!cache->cached) + return 0; + + /* Small and recoverbale problem, only to info developer */ +#ifdef CONFIG_BTRFS_DEBUG + WARN_ON(1); +#endif + btrfs_warn(fs_info, + "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x", + cache->offset, cache->phys, cache->len, cache->flags); + ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, + cache->len, cache->flags); + cache->cached = false; + if (ret > 0) + ret = 0; + return ret; +} + int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent) { @@ -4394,6 +4516,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, struct extent_state *cached_state = NULL; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; + struct fiemap_cache cache = { 0 }; int end = 0; u64 em_start = 0; u64 em_len = 0; @@ -4573,8 +4696,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= FIEMAP_EXTENT_LAST; end = 1; } - ret = fiemap_fill_next_extent(fieinfo, em_start, disko, - em_len, flags); + ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko, + em_len, flags); if (ret) { if (ret == 1) ret = 0; @@ -4582,6 +4705,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } } out_free: + if (!ret) + ret = check_fiemap_cache(root->fs_info, fieinfo, &cache); free_extent_map(em); out: btrfs_free_path(path); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1eafa2f0ede3..487ca0207cb6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -92,9 +92,9 @@ struct btrfs_inode; struct btrfs_io_bio; struct io_failure_record; -typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset); +typedef blk_status_t (extent_submit_bio_hook_t)(struct inode *inode, + struct bio *bio, int mirror_num, unsigned long bio_flags, + u64 bio_offset); struct extent_io_ops { /* * The following callbacks must be allways defined, the function diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 64fcb31d7163..5b1c7090e546 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err) kfree(bio->csum_allocated); } -static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, +static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u64 logical_offset, u32 *dst, int dio) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, path = btrfs_alloc_path(); if (!path) - return -ENOMEM; + return BLK_STS_RESOURCE; nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; if (!dst) { @@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, csum_size, GFP_NOFS); if (!btrfs_bio->csum_allocated) { btrfs_free_path(path); - return -ENOMEM; + return BLK_STS_RESOURCE; } btrfs_bio->csum = btrfs_bio->csum_allocated; btrfs_bio->end_io = btrfs_io_bio_endio_readpage; @@ -303,12 +303,12 @@ next: return 0; } -int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst) +blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst) { return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0); } -int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset) +blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset) { return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1); } @@ -433,7 +433,7 @@ fail: return ret; } -int btrfs_csum_one_bio(struct inode *inode, struct bio *bio, +blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, u64 file_start, int contig) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio, sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), GFP_NOFS); if (!sums) - return -ENOMEM; + return BLK_STS_RESOURCE; sums->len = bio->bi_iter.bi_size; INIT_LIST_HEAD(&sums->list); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index da1096eb1a40..59e2dccdf75b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1875,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, ssize_t num_written = 0; bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); ssize_t err; - loff_t pos; - size_t count; + loff_t pos = iocb->ki_pos; + size_t count = iov_iter_count(from); loff_t oldsize; int clean_page = 0; - inode_lock(inode); + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + /* Don't sleep on inode rwsem */ + if (!inode_trylock(inode)) + return -EAGAIN; + /* + * We will allocate space in case nodatacow is not set, + * so bail + */ + if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | + BTRFS_INODE_PREALLOC)) || + check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) { + inode_unlock(inode); + return -EAGAIN; + } + } else + inode_lock(inode); + err = generic_write_checks(iocb, from); if (err <= 0) { inode_unlock(inode); @@ -1914,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, */ update_time_for_write(inode); - pos = iocb->ki_pos; - count = iov_iter_count(from); start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); if (start_pos > oldsize) { @@ -3071,13 +3086,19 @@ out: return offset; } +static int btrfs_file_open(struct inode *inode, struct file *filp) +{ + filp->f_mode |= FMODE_AIO_NOWAIT; + return generic_file_open(inode, filp); +} + const struct file_operations btrfs_file_operations = { .llseek = btrfs_file_llseek, .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, .mmap = btrfs_file_mmap, - .open = generic_file_open, + .open = btrfs_file_open, .release = btrfs_release_file, .fsync = btrfs_sync_file, .fallocate = btrfs_fallocate, diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index a97fdc156a03..baacc1866861 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length) { SHASH_DESC_ON_STACK(shash, tfm); u32 *ctx = (u32 *)shash_desc_ctx(shash); + u32 retval; int err; shash->tfm = tfm; @@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length) err = crypto_shash_update(shash, address, length); BUG_ON(err); - return *ctx; + retval = *ctx; + barrier_data(ctx); + return retval; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17cbe9306faf..556c93060606 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -842,13 +842,12 @@ retry: NULL, EXTENT_LOCKED | EXTENT_DELALLOC, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK); - ret = btrfs_submit_compressed_write(inode, + if (btrfs_submit_compressed_write(inode, async_extent->start, async_extent->ram_size, ins.objectid, ins.offset, async_extent->pages, - async_extent->nr_pages); - if (ret) { + async_extent->nr_pages)) { struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *p = async_extent->pages[0]; const u64 start = async_extent->start; @@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) +static blk_status_t __btrfs_submit_bio_start(struct inode *inode, + struct bio *bio, int mirror_num, unsigned long bio_flags, + u64 bio_offset) { - int ret = 0; + blk_status_t ret = 0; ret = btrfs_csum_one_bio(inode, bio, 0, 0); BUG_ON(ret); /* -ENOMEM */ @@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) +static blk_status_t __btrfs_submit_bio_done(struct inode *inode, + struct bio *bio, int mirror_num, unsigned long bio_flags, + u64 bio_offset) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int ret; + blk_status_t ret; ret = btrfs_map_bio(fs_info, bio, mirror_num, 1); if (ret) { - bio->bi_error = ret; + bio->bi_status = ret; bio_endio(bio); } return ret; @@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio, * extent_io.c submission hook. This does the right thing for csum calculation * on write, or reading the csums from the tree before a read */ -static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, +static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; - int ret = 0; + blk_status_t ret = 0; int skip_sum; int async = !atomic_read(&BTRFS_I(inode)->sync_writers); @@ -1991,8 +1990,8 @@ mapit: ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); out: - if (ret < 0) { - bio->bi_error = ret; + if (ret) { + bio->bi_status = ret; bio_endio(bio); } return ret; @@ -2952,7 +2951,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ret = test_range_bit(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, - EXTENT_DEFRAG, 1, cached_state); + EXTENT_DEFRAG, 0, cached_state); if (ret) { u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); if (0 && last_snapshot >= BTRFS_I(inode)->generation) @@ -7483,8 +7482,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) int found = false; void **pagep = NULL; struct page *page = NULL; - int start_idx; - int end_idx; + unsigned long start_idx; + unsigned long end_idx; start_idx = start >> PAGE_SHIFT; @@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio) struct bio_vec *bvec; int i; - if (bio->bi_error) + if (bio->bi_status) goto end; ASSERT(bio->bi_vcnt == 1); @@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio) int ret; int i; - if (bio->bi_error) + if (bio->bi_status) goto end; uptodate = 1; @@ -8141,8 +8140,8 @@ end: bio_put(bio); } -static int __btrfs_subio_endio_read(struct inode *inode, - struct btrfs_io_bio *io_bio, int err) +static blk_status_t __btrfs_subio_endio_read(struct inode *inode, + struct btrfs_io_bio *io_bio, blk_status_t err) { struct btrfs_fs_info *fs_info; struct bio_vec *bvec; @@ -8184,7 +8183,7 @@ try_again: io_bio->mirror_num, btrfs_retry_endio, &done); if (ret) { - err = ret; + err = errno_to_blk_status(ret); goto next; } @@ -8211,8 +8210,8 @@ next: return err; } -static int btrfs_subio_endio_read(struct inode *inode, - struct btrfs_io_bio *io_bio, int err) +static blk_status_t btrfs_subio_endio_read(struct inode *inode, + struct btrfs_io_bio *io_bio, blk_status_t err) { bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; @@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio) struct inode *inode = dip->inode; struct bio *dio_bio; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); - int err = bio->bi_error; + blk_status_t err = bio->bi_status; if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) err = btrfs_subio_endio_read(inode, io_bio, err); @@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio) kfree(dip); - dio_bio->bi_error = bio->bi_error; - dio_end_io(dio_bio, bio->bi_error); + dio_bio->bi_status = bio->bi_status; + dio_end_io(dio_bio); if (io_bio->end_io) - io_bio->end_io(io_bio, err); + io_bio->end_io(io_bio, blk_status_to_errno(err)); bio_put(bio); } @@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio) struct bio *dio_bio = dip->dio_bio; __endio_write_update_ordered(dip->inode, dip->logical_offset, - dip->bytes, !bio->bi_error); + dip->bytes, !bio->bi_status); kfree(dip); - dio_bio->bi_error = bio->bi_error; - dio_end_io(dio_bio, bio->bi_error); + dio_bio->bi_status = bio->bi_status; + dio_end_io(dio_bio); bio_put(bio); } -static int __btrfs_submit_bio_start_direct_io(struct inode *inode, +static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 offset) { - int ret; + blk_status_t ret; ret = btrfs_csum_one_bio(inode, bio, offset, 1); BUG_ON(ret); /* -ENOMEM */ return 0; @@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, static void btrfs_end_dio_bio(struct bio *bio) { struct btrfs_dio_private *dip = bio->bi_private; - int err = bio->bi_error; + blk_status_t err = bio->bi_status; if (err) btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, @@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio) if (dip->errors) { bio_io_error(dip->orig_bio); } else { - dip->dio_bio->bi_error = 0; + dip->dio_bio->bi_status = 0; bio_endio(dip->orig_bio); } out: @@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, return bio; } -static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode, +static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, struct btrfs_dio_private *dip, struct bio *bio, u64 file_offset) { struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); - int ret; + blk_status_t ret; /* * We load all the csum data we need when we submit @@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_dio_private *dip = bio->bi_private; bool write = bio_op(bio) == REQ_OP_WRITE; - int ret; + blk_status_t ret; if (async_submit) async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); @@ -8649,7 +8648,7 @@ free_ordered: * callbacks - they require an allocated dip and a clone of dio_bio. */ if (io_bio && dip) { - io_bio->bi_error = -EIO; + io_bio->bi_status = BLK_STS_IOERR; bio_endio(io_bio); /* * The end io callbacks free our dip, do the final put on io_bio @@ -8668,12 +8667,12 @@ free_ordered: unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, file_offset + dio_bio->bi_iter.bi_size - 1); - dio_bio->bi_error = -EIO; + dio_bio->bi_status = BLK_STS_IOERR; /* * Releases and cleans up our dio_bio, no need to bio_put() * nor bio_endio()/bio_io_error() against dio_bio. */ - dio_end_io(dio_bio, ret); + dio_end_io(dio_bio); } if (io_bio) bio_put(io_bio); @@ -8755,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.overwrite = 1; inode_unlock(inode); relock = true; + } else if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; } ret = btrfs_delalloc_reserve_space(inode, offset, count); if (ret) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index d8ea0eb76325..f3d30d9ea8f9 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio) * this frees the rbio and runs through all the bios in the * bio_list and calls end_io on them */ -static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err) +static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) { struct bio *cur = bio_list_get(&rbio->bio_list); struct bio *next; @@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err) while (cur) { next = cur->bi_next; cur->bi_next = NULL; - cur->bi_error = err; + cur->bi_status = err; bio_endio(cur); cur = next; } @@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err) static void raid_write_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; - int err = bio->bi_error; + blk_status_t err = bio->bi_status; int max_errors; if (err) @@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio) max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? 0 : rbio->bbio->max_errors; if (atomic_read(&rbio->error) > max_errors) - err = -EIO; + err = BLK_STS_IOERR; rbio_orig_end_io(rbio, err); } @@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, * devices or if they are not contiguous */ if (last_end == disk_start && stripe->dev->bdev && - !last->bi_error && + !last->bi_status && last->bi_bdev == stripe->dev->bdev) { ret = bio_add_page(last, page, PAGE_SIZE, 0); if (ret == PAGE_SIZE) @@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; - if (bio->bi_error) + if (bio->bi_status) fail_bio_stripe(rbio, bio); else set_bio_pages_uptodate(bio); @@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio) * we only read stripe pages off the disk, set them * up to date if there were no errors */ - if (bio->bi_error) + if (bio->bi_status) fail_bio_stripe(rbio, bio); else set_bio_pages_uptodate(bio); @@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; - if (bio->bi_error) + if (bio->bi_status) fail_bio_stripe(rbio, bio); else set_bio_pages_uptodate(bio); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c7b45eb2403d..ba5595d19de1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -95,7 +95,7 @@ struct scrub_bio { struct scrub_ctx *sctx; struct btrfs_device *dev; struct bio *bio; - int err; + blk_status_t status; u64 logical; u64 physical; #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO @@ -1668,14 +1668,14 @@ leave_nomem: struct scrub_bio_ret { struct completion event; - int error; + blk_status_t status; }; static void scrub_bio_wait_endio(struct bio *bio) { struct scrub_bio_ret *ret = bio->bi_private; - ret->error = bio->bi_error; + ret->status = bio->bi_status; complete(&ret->event); } @@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, int ret; init_completion(&done.event); - done.error = 0; + done.status = 0; bio->bi_iter.bi_sector = page->logical >> 9; bio->bi_private = &done; bio->bi_end_io = scrub_bio_wait_endio; @@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, return ret; wait_for_completion(&done.event); - if (done.error) + if (done.status) return -EIO; return 0; @@ -1937,7 +1937,7 @@ again: bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - sbio->err = 0; + sbio->status = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical_for_dev_replace || sbio->logical + sbio->page_count * PAGE_SIZE != @@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio) struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->fs_info; - sbio->err = bio->bi_error; + sbio->status = bio->bi_status; sbio->bio = bio; btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, @@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) int i; WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); - if (sbio->err) { + if (sbio->status) { struct btrfs_dev_replace *dev_replace = &sbio->sctx->fs_info->dev_replace; @@ -2341,7 +2341,7 @@ again: bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; bio_set_op_attrs(bio, REQ_OP_READ, 0); - sbio->err = 0; + sbio->status = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical || sbio->logical + sbio->page_count * PAGE_SIZE != @@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio) struct scrub_block *sblock = bio->bi_private; struct btrfs_fs_info *fs_info = sblock->sctx->fs_info; - if (bio->bi_error) + if (bio->bi_status) sblock->no_io_error_seen = 0; bio_put(bio); @@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio) struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->fs_info; - sbio->err = bio->bi_error; + sbio->status = bio->bi_status; sbio->bio = bio; btrfs_queue_work(fs_info->scrub_workers, &sbio->work); @@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) int i; BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); - if (sbio->err) { + if (sbio->status) { for (i = 0; i < sbio->page_count; i++) { struct scrub_page *spage = sbio->pagev[i]; @@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio) struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; struct btrfs_fs_info *fs_info = sparity->sctx->fs_info; - if (bio->bi_error) + if (bio->bi_status) bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 017b67daa3bb..84a495967e0a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio) struct btrfs_bio *bbio = bio->bi_private; int is_orig_bio = 0; - if (bio->bi_error) { + if (bio->bi_status) { atomic_inc(&bbio->error); - if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) { + if (bio->bi_status == BLK_STS_IOERR || + bio->bi_status == BLK_STS_TARGET) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; struct btrfs_device *dev; @@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio) * beyond the tolerance of the btrfs bio */ if (atomic_read(&bbio->error) > bbio->max_errors) { - bio->bi_error = -EIO; + bio->bi_status = BLK_STS_IOERR; } else { /* * this bio is actually up to date, we didn't * go over the max number of errors */ - bio->bi_error = 0; + bio->bi_status = 0; } btrfs_end_bbio(bbio, bio); @@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; bio->bi_iter.bi_sector = logical >> 9; - bio->bi_error = -EIO; + bio->bi_status = BLK_STS_IOERR; btrfs_end_bbio(bbio, bio); } } diff --git a/fs/buffer.c b/fs/buffer.c index 161be58c5cb0..5c2cba8d2387 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -49,7 +49,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - struct writeback_control *wbc); + enum rw_hint hint, struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, + inode->i_write_hint, wbc); nr_underway++; } bh = next; @@ -1883,7 +1884,8 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, + inode->i_write_hint, wbc); nr_underway++; } bh = next; @@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio) if (unlikely(bio_flagged(bio, BIO_QUIET))) set_bit(BH_Quiet, &bh->b_state); - bh->b_end_io(bh, !bio->bi_error); + bh->b_end_io(bh, !bio->bi_status); bio_put(bio); } @@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio) } static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - struct writeback_control *wbc) + enum rw_hint write_hint, struct writeback_control *wbc) { struct bio *bio; @@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_bdev = bh->b_bdev; + bio->bi_write_hint = write_hint; bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); BUG_ON(bio->bi_iter.bi_size != bh->b_size); @@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, int submit_bh(int op, int op_flags, struct buffer_head *bh) { - return submit_bh_wbc(op, op_flags, bh, NULL); + return submit_bh_wbc(op, op_flags, bh, 0, NULL); } EXPORT_SYMBOL(submit_bh); diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 9bf90bcc56ac..bb3a02ca9da4 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -18,7 +18,7 @@ #include <linux/fscache-cache.h> #include <linux/timer.h> -#include <linux/wait.h> +#include <linux/wait_bit.h> #include <linux/cred.h> #include <linux/workqueue.h> #include <linux/security.h> @@ -97,7 +97,7 @@ struct cachefiles_cache { * backing file read tracking */ struct cachefiles_one_read { - wait_queue_t monitor; /* link into monitored waitqueue */ + wait_queue_entry_t monitor; /* link into monitored waitqueue */ struct page *back_page; /* backing file page we're waiting for */ struct page *netfs_page; /* netfs page we're going to fill */ struct fscache_retrieval *op; /* retrieval op covering this */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 41df8a27d7eb..3978b324cbca 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -204,7 +204,7 @@ wait_for_old_object: wait_queue_head_t *wq; signed long timeout = 60 * HZ; - wait_queue_t wait; + wait_queue_entry_t wait; bool requeue; /* if the object we're waiting for is queued for processing, diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index afbdc418966d..18d7aa61ef0f 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -21,7 +21,7 @@ * - we use this to detect read completion of backing pages * - the caller holds the waitqueue lock */ -static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, +static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, int sync, void *_key) { struct cachefiles_one_read *monitor = @@ -48,7 +48,7 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, } /* remove from the waitqueue */ - list_del(&wait->task_list); + list_del(&wait->entry); /* move onto the action list and queue for FS-Cache thread pool */ ASSERT(monitor->op); diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 987044bca1c2..59cb307b15fb 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -131,6 +131,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) } if (new_mode != old_mode) { + newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; newattrs.ia_valid = ATTR_MODE; ret = __ceph_setattr(inode, &newattrs); diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e8f11fa565c5..7df550c13d7f 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -91,6 +91,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) ceph_mdsc_put_request(req); if (!inode) return ERR_PTR(-ESTALE); + if (inode->i_nlink == 0) { + iput(inode); + return ERR_PTR(-ESTALE); + } } return d_obtain_alias(inode); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index dcce79b84406..4de6cdddf059 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2022,7 +2022,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) attr->ia_size > inode->i_size) { i_size_write(inode, attr->ia_size); inode->i_blocks = calc_inode_blocks(attr->ia_size); - inode->i_ctime = attr->ia_ctime; ci->i_reported_size = attr->ia_size; dirtied |= CEPH_CAP_FILE_EXCL; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || @@ -2044,7 +2043,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, only ? "ctime only" : "ignored"); - inode->i_ctime = attr->ia_ctime; if (only) { /* * if kernel wants to dirty ctime but nothing else, @@ -2067,7 +2065,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (dirtied) { inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, &prealloc_cf); - inode->i_ctime = current_time(inode); + inode->i_ctime = attr->ia_ctime; } release &= issued; @@ -2085,6 +2083,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_inode_drop = release; req->r_args.setattr.mask = cpu_to_le32(mask); req->r_num_caps = 1; + req->r_stamp = attr->ia_ctime; err = ceph_mdsc_do_request(mdsc, NULL, req); } dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f38e56fa9712..0c05df44cc6c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1687,7 +1687,6 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); - struct timespec ts; if (!req) return ERR_PTR(-ENOMEM); @@ -1706,8 +1705,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); - ktime_get_real_ts(&ts); - req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran); + req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran); req->r_op = op; req->r_direct_mode = mode; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0fd081bd2a2f..fcef70602b27 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (to->type & ITER_IOVEC) + if (to->type == ITER_IOVEC) ctx->should_dirty = true; rc = setup_aio_ctx_iter(ctx, to, READ); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4d1fcd76d022..a8693632235f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -24,6 +24,7 @@ #include <linux/pagemap.h> #include <linux/freezer.h> #include <linux/sched/signal.h> +#include <linux/wait_bit.h> #include <asm/div64.h> #include "cifsfs.h" diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index b08531977daa..3b147dc6af63 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) if (!pages) { pages = vmalloc(max_pages * sizeof(struct page *)); - if (!bv) { + if (!pages) { kvfree(bv); return -ENOMEM; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 27bc360c7ffd..a723df3e0197 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid, __u16 search_flags, struct cifs_search_info *srch_inf) { - return CIFSFindFirst(xid, tcon, path, cifs_sb, - &fid->netfid, search_flags, srch_inf, true); + int rc; + + rc = CIFSFindFirst(xid, tcon, path, cifs_sb, + &fid->netfid, search_flags, srch_inf, true); + if (rc) + cifs_dbg(FYI, "find first failed=%d\n", rc); + return rc; } static int diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c58691834eb2..7e48561abd29 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); kfree(utf16_path); if (rc) { - cifs_dbg(VFS, "open dir failed\n"); + cifs_dbg(FYI, "open dir failed rc=%d\n", rc); return rc; } @@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, fid->volatile_fid, 0, srch_inf); if (rc) { - cifs_dbg(VFS, "query directory failed\n"); + cifs_dbg(FYI, "query directory failed rc=%d\n", rc); SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } return rc; @@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) sg = init_sg(rqst, sign); if (!sg) { - cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc); + cifs_dbg(VFS, "%s: Failed to init sg", __func__); + rc = -ENOMEM; goto free_req; } @@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) iv = kzalloc(iv_len, GFP_KERNEL); if (!iv) { cifs_dbg(VFS, "%s: Failed to alloc IV", __func__); + rc = -ENOMEM; goto free_sg; } iv[0] = 3; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 3cb5c9e2d4e7..de50e749ff05 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode, pcreatetime = (__u64 *)value; *pcreatetime = CIFS_I(inode)->createtime; return sizeof(__u64); - - return rc; } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6116d5275a3e..112b3e1e20e3 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -866,8 +866,6 @@ COMPATIBLE_IOCTL(TIOCGDEV) COMPATIBLE_IOCTL(TIOCCBRK) COMPATIBLE_IOCTL(TIOCGSID) COMPATIBLE_IOCTL(TIOCGICOUNT) -COMPATIBLE_IOCTL(TIOCGPKT) -COMPATIBLE_IOCTL(TIOCGPTLCK) COMPATIBLE_IOCTL(TIOCGEXCL) /* Little t */ COMPATIBLE_IOCTL(TIOCGETD) @@ -883,16 +881,12 @@ COMPATIBLE_IOCTL(TIOCMGET) COMPATIBLE_IOCTL(TIOCMBIC) COMPATIBLE_IOCTL(TIOCMBIS) COMPATIBLE_IOCTL(TIOCMSET) -COMPATIBLE_IOCTL(TIOCPKT) COMPATIBLE_IOCTL(TIOCNOTTY) COMPATIBLE_IOCTL(TIOCSTI) COMPATIBLE_IOCTL(TIOCOUTQ) COMPATIBLE_IOCTL(TIOCSPGRP) COMPATIBLE_IOCTL(TIOCGPGRP) -COMPATIBLE_IOCTL(TIOCGPTN) -COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) -COMPATIBLE_IOCTL(TIOCSIG) #ifdef TIOCSRS485 COMPATIBLE_IOCTL(TIOCSRS485) #endif diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 8b2a994042dd..a66f6624d899 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item) } EXPORT_SYMBOL(config_item_get); +struct config_item *config_item_get_unless_zero(struct config_item *item) +{ + if (item && kref_get_unless_zero(&item->ci_kref)) + return item; + return NULL; +} +EXPORT_SYMBOL(config_item_get_unless_zero); + static void config_item_cleanup(struct config_item *item) { struct config_item_type *t = item->ci_type; diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index a6ab012a2c6a..c8aabba502f6 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item, ret = -ENOMEM; sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); if (sl) { - sl->sl_target = config_item_get(item); spin_lock(&configfs_dirent_lock); if (target_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); - config_item_put(item); kfree(sl); return -ENOENT; } + sl->sl_target = config_item_get(item); list_add(&sl->sl_list, &target_sd->s_links); spin_unlock(&configfs_dirent_lock); ret = configfs_create_link(sl, parent_item->ci_dentry, diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index a409a84f1bca..6181e9526860 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, goto errout; } err = submit_bio_wait(bio); - if ((err == 0) && bio->bi_error) + if (err == 0 && bio->bi_status) err = -EIO; bio_put(bio); if (err) @@ -84,7 +84,7 @@ struct exceptional_entry_key { }; struct wait_exceptional_entry_queue { - wait_queue_t wait; + wait_queue_entry_t wait; struct exceptional_entry_key key; }; @@ -108,7 +108,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping, return wait_table + hash; } -static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode, +static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode, int sync, void *keyp) { struct exceptional_entry_key *key = keyp; @@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, if (ret < 0) goto out; } + start_index = indices[pvec.nr - 1] + 1; } out: put_dax(dax_dev); diff --git a/fs/dcache.c b/fs/dcache.c index cddf39777835..a9f995f6859e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1494,7 +1494,7 @@ static void check_and_drop(void *_data) { struct detach_data *data = _data; - if (!data->mountpoint && !data->select.found) + if (!data->mountpoint && list_empty(&data->select.dispose)) __d_drop(data->select.start); } @@ -1536,17 +1536,15 @@ void d_invalidate(struct dentry *dentry) d_walk(dentry, &data, detach_and_collect, check_and_drop); - if (data.select.found) + if (!list_empty(&data.select.dispose)) shrink_dentry_list(&data.select.dispose); + else if (!data.mountpoint) + return; if (data.mountpoint) { detach_mounts(data.mountpoint); dput(data.mountpoint); } - - if (!data.mountpoint && !data.select.found) - break; - cond_resched(); } } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 354e2ab62031..6dabc4a10396 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/filesystems for more details. + * See Documentation/filesystems/ for more details. * */ diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e892ae7d89f8..77440e4aa9d4 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/kernel-api for more details. + * See ./Documentation/core-api/kernel-api.rst for more details. * */ diff --git a/fs/direct-io.c b/fs/direct-io.c index a04ebea77de8..08cf27811e5a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work) dio_complete(dio, 0, true); } -static int dio_bio_complete(struct dio *dio, struct bio *bio); +static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); /* * Asynchronous IO callback. @@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio) /** * dio_end_io - handle the end io action for the given bio * @bio: The direct io bio thats being completed - * @error: Error if there was one * * This is meant to be called by any filesystem that uses their own dio_submit_t * so that the DIO specific endio actions are dealt with after the filesystem * has done it's completion work. */ -void dio_end_io(struct bio *bio, int error) +void dio_end_io(struct bio *bio) { struct dio *dio = bio->bi_private; @@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, else bio->bi_end_io = dio_bio_end_io; + bio->bi_write_hint = dio->iocb->ki_hint; + sdio->bio = bio; sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; } @@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio) /* * Process one completed BIO. No locks are held. */ -static int dio_bio_complete(struct dio *dio, struct bio *bio) +static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) { struct bio_vec *bvec; unsigned i; - int err; + blk_status_t err = bio->bi_status; - if (bio->bi_error) - dio->io_error = -EIO; + if (err) { + if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) + dio->io_error = -EAGAIN; + else + dio->io_error = -EIO; + } if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { - err = bio->bi_error; bio_check_pages_dirty(bio); /* transfers ownership */ } else { bio_for_each_segment_all(bvec, bio, i) { @@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) set_page_dirty_lock(page); put_page(page); } - err = bio->bi_error; bio_put(bio); } return err; @@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) bio = dio->bio_list; dio->bio_list = bio->bi_private; spin_unlock_irqrestore(&dio->bio_lock, flags); - ret2 = dio_bio_complete(dio, bio); + ret2 = blk_status_to_errno(dio_bio_complete(dio, bio)); if (ret == 0) ret = ret2; } @@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, if (iov_iter_rw(iter) == WRITE) { dio->op = REQ_OP_WRITE; dio->op_flags = REQ_SYNC | REQ_IDLE; + if (iocb->ki_flags & IOCB_NOWAIT) + dio->op_flags |= REQ_NOWAIT; } else { dio->op = REQ_OP_READ; } diff --git a/fs/eventfd.c b/fs/eventfd.c index 68b9fffcb2c8..2fb4eadaa118 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -191,7 +191,7 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) * This is used to atomically remove a wait queue entry from the eventfd wait * queue head, and read/reset the counter value. */ -int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt) { unsigned long flags; @@ -215,8 +215,8 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); * * Returns %0 if successful, or the following error codes: * - * -EAGAIN : The operation would have blocked but @no_wait was non-zero. - * -ERESTARTSYS : A signal interrupted the wait operation. + * - -EAGAIN : The operation would have blocked but @no_wait was non-zero. + * - -ERESTARTSYS : A signal interrupted the wait operation. * * If @no_wait is zero, the function might sleep until the eventfd internal * counter becomes greater than zero. diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5420767c9b68..b1c8e23ddf65 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -244,7 +244,7 @@ struct eppoll_entry { * Wait queue item that will be linked to the target file wait * queue head. */ - wait_queue_t wait; + wait_queue_entry_t wait; /* The wait queue head that linked the "wait" wait queue item */ wait_queue_head_t *whead; @@ -347,13 +347,13 @@ static inline int ep_is_linked(struct list_head *p) return !list_empty(p); } -static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) +static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait); } /* Get the "struct epitem" from a wait queue pointer */ -static inline struct epitem *ep_item_from_wait(wait_queue_t *p) +static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait)->base; } @@ -1078,7 +1078,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) * mechanism. It is called by the stored file descriptors when they * have events to report. */ -static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int pwake = 0; unsigned long flags; @@ -1094,7 +1094,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k * can't use __remove_wait_queue(). whead->lock is held by * the caller. */ - list_del_init(&wait->task_list); + list_del_init(&wait->entry); } spin_lock_irqsave(&ep->lock, flags); @@ -1699,7 +1699,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int res = 0, eavail, timed_out = 0; unsigned long flags; u64 slack = 0; - wait_queue_t wait; + wait_queue_entry_t wait; ktime_t expires, *to = NULL; if (timeout > 0) { diff --git a/fs/exec.c b/fs/exec.c index 72934df68471..904199086490 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (write) { unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; + unsigned long ptr_size; struct rlimit *rlim; + /* + * Since the stack will hold pointers to the strings, we + * must account for them as well. + * + * The size calculation is the entire vma while each arg page is + * built, so each time we get here it's calculating how far it + * is currently (rather than each call being just the newly + * added size from the arg page). As a result, we need to + * always add the entire size of the pointers, so that on the + * last call to get_arg_page() we'll actually have the entire + * correct size. + */ + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + if (ptr_size > ULONG_MAX - size) + goto fail; + size += ptr_size; + acct_arg_size(bprm, size / PAGE_SIZE); /* @@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ rlim = current->signal->rlim; - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { - put_page(page); - return NULL; - } + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) + goto fail; } return page; + +fail: + put_page(page); + return NULL; } static void put_arg_page(struct page *page) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index fd389935ecd1..3ec0e46de95f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -4,6 +4,7 @@ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> */ +#include <linux/quotaops.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" @@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) handle_t *handle; int error, retries = 0; + error = dquot_initialize(inode); + if (error) + return error; retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, ext4_jbd2_credits_xattr(inode)); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8e8046104f4d..32191548abed 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2523,7 +2523,6 @@ extern int ext4_search_dir(struct buffer_head *bh, int buf_size, struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, unsigned int offset, struct ext4_dir_entry_2 **res_dir); extern int ext4_generic_delete_entry(handle_t *handle, @@ -3007,7 +3006,6 @@ extern int htree_inlinedir_to_tree(struct file *dir_file, int *has_inline_data); extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *has_inline_data); extern int ext4_delete_inline_entry(handle_t *handle, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2a97dff87b96..3e36508610b7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3413,13 +3413,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; - struct ext4_extent zero_ex; + struct ext4_extent zero_ex1, zero_ex2; struct ext4_extent *ex, *abut_ex; ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth, map_len = map->m_len; int allocated = 0, max_zeroout = 0; int err = 0; - int split_flag = 0; + int split_flag = EXT4_EXT_DATA_VALID2; ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, @@ -3436,7 +3436,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - zero_ex.ee_len = 0; + zero_ex1.ee_len = 0; + zero_ex2.ee_len = 0; trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); @@ -3576,62 +3577,52 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (ext4_encrypted_inode(inode)) max_zeroout = 0; - /* If extent is less than s_max_zeroout_kb, zeroout directly */ - if (max_zeroout && (ee_len <= max_zeroout)) { - err = ext4_ext_zeroout(inode, ex); - if (err) - goto out; - zero_ex.ee_block = ex->ee_block; - zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)); - ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; - ext4_ext_mark_initialized(ex); - ext4_ext_try_to_merge(handle, inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + path->p_depth); - goto out; - } - /* - * four cases: + * five cases: * 1. split the extent into three extents. - * 2. split the extent into two extents, zeroout the first half. - * 3. split the extent into two extents, zeroout the second half. + * 2. split the extent into two extents, zeroout the head of the first + * extent. + * 3. split the extent into two extents, zeroout the tail of the second + * extent. * 4. split the extent into two extents with out zeroout. + * 5. no splitting needed, just possibly zeroout the head and / or the + * tail of the extent. */ split_map.m_lblk = map->m_lblk; split_map.m_len = map->m_len; - if (max_zeroout && (allocated > map->m_len)) { + if (max_zeroout && (allocated > split_map.m_len)) { if (allocated <= max_zeroout) { - /* case 3 */ - zero_ex.ee_block = - cpu_to_le32(map->m_lblk); - zero_ex.ee_len = cpu_to_le16(allocated); - ext4_ext_store_pblock(&zero_ex, - ext4_ext_pblock(ex) + map->m_lblk - ee_block); - err = ext4_ext_zeroout(inode, &zero_ex); + /* case 3 or 5 */ + zero_ex1.ee_block = + cpu_to_le32(split_map.m_lblk + + split_map.m_len); + zero_ex1.ee_len = + cpu_to_le16(allocated - split_map.m_len); + ext4_ext_store_pblock(&zero_ex1, + ext4_ext_pblock(ex) + split_map.m_lblk + + split_map.m_len - ee_block); + err = ext4_ext_zeroout(inode, &zero_ex1); if (err) goto out; - split_map.m_lblk = map->m_lblk; split_map.m_len = allocated; - } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { - /* case 2 */ - if (map->m_lblk != ee_block) { - zero_ex.ee_block = ex->ee_block; - zero_ex.ee_len = cpu_to_le16(map->m_lblk - + } + if (split_map.m_lblk - ee_block + split_map.m_len < + max_zeroout) { + /* case 2 or 5 */ + if (split_map.m_lblk != ee_block) { + zero_ex2.ee_block = ex->ee_block; + zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk - ee_block); - ext4_ext_store_pblock(&zero_ex, + ext4_ext_store_pblock(&zero_ex2, ext4_ext_pblock(ex)); - err = ext4_ext_zeroout(inode, &zero_ex); + err = ext4_ext_zeroout(inode, &zero_ex2); if (err) goto out; } + split_map.m_len += split_map.m_lblk - ee_block; split_map.m_lblk = ee_block; - split_map.m_len = map->m_lblk - ee_block + map->m_len; allocated = map->m_len; } } @@ -3642,8 +3633,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, err = 0; out: /* If we have gotten a failure, don't zero out status tree */ - if (!err) - err = ext4_zeroout_es(inode, &zero_ex); + if (!err) { + err = ext4_zeroout_es(inode, &zero_ex1); + if (!err) + err = ext4_zeroout_es(inode, &zero_ex2); + } return err ? err : allocated; } @@ -4883,6 +4877,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); if (file->f_flags & O_SYNC) ext4_handle_sync(handle); @@ -5569,6 +5565,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ext4_handle_sync(handle); inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); @@ -5742,6 +5739,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 831fd6beebf0..58e2eeaa0bc4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - inode_lock_shared(inode); + if (!inode_trylock_shared(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock_shared(inode); + } /* * Recheck under inode lock - at this point we are sure it cannot * change anymore @@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - inode_lock(inode); + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } ret = ext4_write_checks(iocb, from); if (ret <= 0) goto out; @@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) return ext4_dax_write_iter(iocb, from); #endif - inode_lock(inode); + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + ret = ext4_write_checks(iocb, from); if (ret <= 0) goto out; @@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) iocb->private = &overwrite; /* Check whether we do a DIO overwrite or not */ - if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio && - ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) - overwrite = 1; + if (o_direct && !unaligned_aio) { + if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) { + if (ext4_should_dioread_nolock(inode)) + overwrite = 1; + } else if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; + } + } ret = __generic_file_write_iter(iocb, from); inode_unlock(inode); @@ -435,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ret < 0) return ret; } + + /* Set the flags to support nowait AIO */ + filp->f_mode |= FMODE_AIO_NOWAIT; + return dquot_file_open(inode, filp); } @@ -474,57 +497,37 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, endoff = (loff_t)end_blk << blkbits; index = startoff >> PAGE_SHIFT; - end = endoff >> PAGE_SHIFT; + end = (endoff - 1) >> PAGE_SHIFT; pagevec_init(&pvec, 0); do { int i, num; unsigned long nr_pages; - num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); - if (nr_pages == 0) { - if (whence == SEEK_DATA) - break; - - BUG_ON(whence != SEEK_HOLE); - /* - * If this is the first time to go into the loop and - * offset is not beyond the end offset, it will be a - * hole at this offset - */ - if (lastoff == startoff || lastoff < endoff) - found = 1; + if (nr_pages == 0) break; - } - - /* - * If this is the first time to go into the loop and - * offset is smaller than the first page offset, it will be a - * hole at this offset. - */ - if (lastoff == startoff && whence == SEEK_HOLE && - lastoff < page_offset(pvec.pages[0])) { - found = 1; - break; - } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; struct buffer_head *bh, *head; /* - * If the current offset is not beyond the end of given - * range, it will be a hole. + * If current offset is smaller than the page offset, + * there is a hole at this offset. */ - if (lastoff < endoff && whence == SEEK_HOLE && - page->index > end) { + if (whence == SEEK_HOLE && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { found = 1; *offset = lastoff; goto out; } + if (page->index > end) + goto out; + lock_page(page); if (unlikely(page->mapping != inode->i_mapping)) { @@ -564,20 +567,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); } - /* - * The no. of pages is less than our desired, that would be a - * hole in there. - */ - if (nr_pages < num && whence == SEEK_HOLE) { - found = 1; - *offset = lastoff; + /* The no. of pages is less than our desired, we are done. */ + if (nr_pages < num) break; - } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); + if (whence == SEEK_HOLE && lastoff < endoff) { + found = 1; + *offset = lastoff; + } out: pagevec_release(&pvec); return found; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d5dea4c293ef..8d141c0c8ff9 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1627,7 +1627,6 @@ out: struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *has_inline_data) { @@ -1649,7 +1648,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; ret = ext4_search_dir(iloc.bh, inline_start, inline_size, - dir, fname, d_name, 0, res_dir); + dir, fname, 0, res_dir); if (ret == 1) goto out_find; if (ret < 0) @@ -1662,7 +1661,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; ret = ext4_search_dir(iloc.bh, inline_start, inline_size, - dir, fname, d_name, 0, res_dir); + dir, fname, 0, res_dir); if (ret == 1) goto out_find; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1bd0bfa547f6..5cf82d03968c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2124,15 +2124,29 @@ static int ext4_writepage(struct page *page, static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) { int len; - loff_t size = i_size_read(mpd->inode); + loff_t size; int err; BUG_ON(page->index != mpd->first_page); + clear_page_dirty_for_io(page); + /* + * We have to be very careful here! Nothing protects writeback path + * against i_size changes and the page can be writeably mapped into + * page tables. So an application can be growing i_size and writing + * data through mmap while writeback runs. clear_page_dirty_for_io() + * write-protects our page in page tables and the page cannot get + * written to again until we release page lock. So only after + * clear_page_dirty_for_io() we are safe to sample i_size for + * ext4_bio_write_page() to zero-out tail of the written page. We rely + * on the barrier provided by TestClearPageDirty in + * clear_page_dirty_for_io() to make sure i_size is really sampled only + * after page tables are updated. + */ + size = i_size_read(mpd->inode); if (page->index == size >> PAGE_SHIFT) len = size & ~PAGE_MASK; else len = PAGE_SIZE; - clear_page_dirty_for_io(page); err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) mpd->wbc->nr_to_write--; @@ -3629,9 +3643,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) get_block_func = ext4_dio_get_block_unwritten_async; dio_flags = DIO_LOCKING; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION - BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); -#endif ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block_func, ext4_end_io_dio, NULL, dio_flags); @@ -3713,7 +3724,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) */ inode_lock_shared(inode); ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, - iocb->ki_pos + count); + iocb->ki_pos + count - 1); if (ret) goto out_unlock; ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, @@ -4207,6 +4218,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); out_dio: @@ -5637,8 +5650,9 @@ static int ext4_expand_extra_isize(struct inode *inode, /* No extended attributes present */ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { - memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, - new_extra_isize); + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize, 0, + new_extra_isize - EXT4_I(inode)->i_extra_isize); EXT4_I(inode)->i_extra_isize = new_extra_isize; return 0; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 5083bce20ac4..b7928cddd539 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3887,7 +3887,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, "Error loading buddy information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + err, group); put_bh(bitmap_bh); return 0; } @@ -4044,10 +4045,11 @@ repeat: BUG_ON(pa->pa_type != MB_INODE_PA); group = ext4_get_group_number(sb, pa->pa_pstart); - err = ext4_mb_load_buddy(sb, group, &e4b); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); if (err) { - ext4_error(sb, "Error loading buddy information for %u", - group); + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } @@ -4303,11 +4305,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, spin_unlock(&lg->lg_prealloc_lock); list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { + int err; group = ext4_get_group_number(sb, pa->pa_pstart); - if (ext4_mb_load_buddy(sb, group, &e4b)) { - ext4_error(sb, "Error loading buddy information for %u", - group); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); + if (err) { + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } ext4_lock_group(sb, group); @@ -5127,8 +5132,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { - ext4_error(sb, "Error in loading buddy " - "information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + ret, group); return ret; } bitmap = e4b.bd_bitmap; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b81f7d46f344..404256caf9cf 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1155,12 +1155,11 @@ errout: static inline int search_dirblock(struct buffer_head *bh, struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, unsigned int offset, struct ext4_dir_entry_2 **res_dir) { return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir, - fname, d_name, offset, res_dir); + fname, offset, res_dir); } /* @@ -1262,7 +1261,6 @@ static inline bool ext4_match(const struct ext4_filename *fname, */ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, unsigned int offset, struct ext4_dir_entry_2 **res_dir) { struct ext4_dir_entry_2 * de; @@ -1355,7 +1353,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir, + ret = ext4_find_inline_entry(dir, &fname, res_dir, &has_inline_data); if (has_inline_data) { if (inlined) @@ -1447,7 +1445,7 @@ restart: goto next; } set_buffer_verified(bh); - i = search_dirblock(bh, dir, &fname, d_name, + i = search_dirblock(bh, dir, &fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; @@ -1488,7 +1486,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, { struct super_block * sb = dir->i_sb; struct dx_frame frames[2], *frame; - const struct qstr *d_name = fname->usr_fname; struct buffer_head *bh; ext4_lblk_t block; int retval; @@ -1505,7 +1502,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, if (IS_ERR(bh)) goto errout; - retval = search_dirblock(bh, dir, fname, d_name, + retval = search_dirblock(bh, dir, fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (retval == 1) @@ -1530,7 +1527,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, bh = NULL; errout: - dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); + dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name)); success: dx_release(frames); return bh; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 1a82138ba739..c2fce4478cca 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio) } #endif - if (bio->bi_error) { + if (bio->bi_status) { SetPageError(page); mapping_set_error(page->mapping, -EIO); } @@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio) continue; } clear_buffer_async_write(bh); - if (bio->bi_error) + if (bio->bi_status) buffer_io_error(bh); } while ((bh = bh->b_this_page) != head); bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); @@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio) bdevname(bio->bi_bdev, b), (long long) bio->bi_iter.bi_sector, (unsigned) bio_sectors(bio), - bio->bi_error)) { + bio->bi_status)) { ext4_finish_bio(bio); bio_put(bio); return; } bio->bi_end_io = NULL; - if (bio->bi_error) { + if (bio->bi_status) { struct inode *inode = io_end->inode; ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " "(offset %llu size %ld starting block %llu)", - bio->bi_error, inode->i_ino, + bio->bi_status, inode->i_ino, (unsigned long long) io_end->offset, (long) io_end->size, (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); - mapping_set_error(inode->i_mapping, bio->bi_error); + mapping_set_error(inode->i_mapping, + blk_status_to_errno(bio->bi_status)); } if (io_end->flag & EXT4_IO_END_UNWRITTEN) { @@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io) if (bio) { int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0; + io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint; bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); submit_bio(io->io_bio); } @@ -396,6 +398,7 @@ submit_and_retry: ret = io_submit_init_bio(io, bh); if (ret) return ret; + io->io_bio->bi_write_hint = inode->i_write_hint; } ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); if (ret != bh->b_size) diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index a81b829d56de..40a5497b0f60 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio) int i; if (ext4_bio_encrypted(bio)) { - if (bio->bi_error) { + if (bio->bi_status) { fscrypt_release_ctx(bio->bi_private); } else { fscrypt_decrypt_bio_pages(bio->bi_private, bio); @@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - if (!bio->bi_error) { + if (!bio->bi_status) { SetPageUptodate(page); } else { ClearPageUptodate(page); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0b177da9ea82..9006cb5857b8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -848,14 +848,9 @@ static inline void ext4_quota_off_umount(struct super_block *sb) { int type; - if (ext4_has_feature_quota(sb)) { - dquot_disable(sb, -1, - DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); - } else { - /* Use our quota_off function to clear inode flags etc. */ - for (type = 0; type < EXT4_MAXQUOTAS; type++) - ext4_quota_off(sb, type); - } + /* Use our quota_off function to clear inode flags etc. */ + for (type = 0; type < EXT4_MAXQUOTAS; type++) + ext4_quota_off(sb, type); } #else static inline void ext4_quota_off_umount(struct super_block *sb) @@ -1179,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, return res; } + res = dquot_initialize(inode); + if (res) + return res; retry: handle = ext4_journal_start(inode, EXT4_HT_MISC, ext4_jbd2_credits_xattr(inode)); @@ -3952,7 +3950,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_qcop = &ext4_qctl_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif - memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); + memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); @@ -5485,7 +5483,7 @@ static int ext4_quota_off(struct super_block *sb, int type) goto out; err = dquot_quota_off(sb, type); - if (err) + if (err || ext4_has_feature_quota(sb)) goto out_put; inode_lock(inode); @@ -5505,6 +5503,7 @@ static int ext4_quota_off(struct super_block *sb, int type) out_unlock: inode_unlock(inode); out_put: + lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL); iput(inode); return err; out: diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8fb7ce14e6eb..5d3c2536641c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -888,6 +888,8 @@ inserted: else { u32 ref; + WARN_ON_ONCE(dquot_initialize_needed(inode)); + /* The old block is released after updating the inode. */ error = dquot_alloc_block(inode, @@ -954,6 +956,8 @@ inserted: /* We need to allocate a new block */ ext4_fsblk_t goal, block; + WARN_ON_ONCE(dquot_initialize_needed(inode)); + goal = ext4_group_first_block_no(sb, EXT4_I(inode)->i_block_group); @@ -1166,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, return -EINVAL; if (strlen(name) > 255) return -ERANGE; + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); @@ -1267,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, int error, retries = 0; int credits = ext4_jbd2_credits_xattr(inode); + error = dquot_initialize(inode); + if (error) + return error; retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); if (IS_ERR(handle)) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7c0f6bdf817d..36fe82012a33 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio) #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { f2fs_show_injection_info(FAULT_IO); - bio->bi_error = -EIO; + bio->bi_status = BLK_STS_IOERR; } #endif if (f2fs_bio_encrypted(bio)) { - if (bio->bi_error) { + if (bio->bi_status) { fscrypt_release_ctx(bio->bi_private); } else { fscrypt_decrypt_bio_pages(bio->bi_private, bio); @@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; - if (!bio->bi_error) { + if (!bio->bi_status) { if (!PageUptodate(page)) SetPageUptodate(page); } else { @@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio) unlock_page(page); mempool_free(page, sbi->write_io_dummy); - if (unlikely(bio->bi_error)) + if (unlikely(bio->bi_status)) f2fs_stop_checkpoint(sbi, true); continue; } fscrypt_pullback_bio_page(&page, true); - if (unlikely(bio->bi_error)) { + if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); f2fs_stop_checkpoint(sbi, true); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2185c7a040a1..fd2e651bad6d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1078,6 +1078,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, { SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver); u32 *ctx = (u32 *)shash_desc_ctx(shash); + u32 retval; int err; shash->tfm = sbi->s_chksum_driver; @@ -1087,7 +1088,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, err = crypto_shash_update(shash, address, length); BUG_ON(err); - return *ctx; + retval = *ctx; + barrier_data(ctx); + return retval; } static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 96845854e7ee..ea9f455d94ba 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; - dc->error = bio->bi_error; + dc->error = blk_status_to_errno(bio->bi_status); dc->state = D_DONE; complete(&dc->wait); bio_put(bio); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 83355ec4a92c..0b89b0b7b9f7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1937,7 +1937,7 @@ try_onemore: sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); - memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; diff --git a/fs/fcntl.c b/fs/fcntl.c index f4e7267d117f..ed051f825bad 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -243,6 +243,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) } #endif +static bool rw_hint_valid(enum rw_hint hint) +{ + switch (hint) { + case RWF_WRITE_LIFE_NOT_SET: + case RWH_WRITE_LIFE_NONE: + case RWH_WRITE_LIFE_SHORT: + case RWH_WRITE_LIFE_MEDIUM: + case RWH_WRITE_LIFE_LONG: + case RWH_WRITE_LIFE_EXTREME: + return true; + default: + return false; + } +} + +static long fcntl_rw_hint(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file_inode(file); + u64 *argp = (u64 __user *)arg; + enum rw_hint hint; + u64 h; + + switch (cmd) { + case F_GET_FILE_RW_HINT: + h = file_write_hint(file); + if (copy_to_user(argp, &h, sizeof(*argp))) + return -EFAULT; + return 0; + case F_SET_FILE_RW_HINT: + if (copy_from_user(&h, argp, sizeof(h))) + return -EFAULT; + hint = (enum rw_hint) h; + if (!rw_hint_valid(hint)) + return -EINVAL; + + spin_lock(&file->f_lock); + file->f_write_hint = hint; + spin_unlock(&file->f_lock); + return 0; + case F_GET_RW_HINT: + h = inode->i_write_hint; + if (copy_to_user(argp, &h, sizeof(*argp))) + return -EFAULT; + return 0; + case F_SET_RW_HINT: + if (copy_from_user(&h, argp, sizeof(h))) + return -EFAULT; + hint = (enum rw_hint) h; + if (!rw_hint_valid(hint)) + return -EINVAL; + + inode_lock(inode); + inode->i_write_hint = hint; + inode_unlock(inode); + return 0; + default: + return -EINVAL; + } +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -337,6 +398,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_GET_SEALS: err = shmem_fcntl(filp, cmd, arg); break; + case F_GET_RW_HINT: + case F_SET_RW_HINT: + case F_GET_FILE_RW_HINT: + case F_SET_FILE_RW_HINT: + err = fcntl_rw_hint(filp, cmd, arg); + break; default: break; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 63ee2940775c..8b426f83909f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2052,11 +2052,13 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) } /** - * __mark_inode_dirty - internal function - * @inode: inode to mark - * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) - * Mark an inode as dirty. Callers should use mark_inode_dirty or - * mark_inode_dirty_sync. + * __mark_inode_dirty - internal function + * + * @inode: inode to mark + * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) + * + * Mark an inode as dirty. Callers should use mark_inode_dirty or + * mark_inode_dirty_sync. * * Put the inode on the super block's dirty list. * diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 611b5408f6ec..e747b3d720ee 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -34,7 +34,7 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) void pin_kill(struct fs_pin *p) { - wait_queue_t wait; + wait_queue_entry_t wait; if (!p) { rcu_read_unlock(); @@ -61,7 +61,7 @@ void pin_kill(struct fs_pin *p) rcu_read_unlock(); schedule(); rcu_read_lock(); - if (likely(list_empty(&wait.task_list))) + if (likely(list_empty(&wait.entry))) break; /* OK, we know p couldn't have been freed yet */ spin_lock_irq(&p->wait.lock); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index b7cf65d13561..aa3d44527fa2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -815,7 +815,6 @@ struct gfs2_sbd { atomic_t sd_log_in_flight; struct bio *sd_log_bio; wait_queue_head_t sd_log_flush_wait; - int sd_log_error; atomic_t sd_reserving_log; wait_queue_head_t sd_reserving_log_wait; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index b1f9144b42c7..885d36e7a29f 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp) */ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, - int error) + blk_status_t error) { struct buffer_head *bh, *next; struct page *page = bvec->bv_page; @@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio) struct page *page; int i; - if (bio->bi_error) { - sdp->sd_log_error = bio->bi_error; - fs_err(sdp, "Error %d writing to log\n", bio->bi_error); - } + if (bio->bi_status) + fs_err(sdp, "Error %d writing to log\n", bio->bi_status); bio_for_each_segment_all(bvec, bio, i) { page = bvec->bv_page; if (page_has_buffers(page)) - gfs2_end_log_write_bh(sdp, bvec, bio->bi_error); + gfs2_end_log_write_bh(sdp, bvec, bio->bi_status); else mempool_free(page, gfs2_page_pool); } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 663ffc135ef3..fabe1614f879 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio) do { struct buffer_head *next = bh->b_this_page; len -= bh->b_size; - bh->b_end_io(bh, !bio->bi_error); + bh->b_end_io(bh, !bio->bi_status); bh = next; } while (bh && len); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ed67548b286c..e76058d34b74 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio) { struct page *page = bio->bi_private; - if (!bio->bi_error) + if (!bio->bi_status) SetPageUptodate(page); else - pr_warn("error %d reading superblock\n", bio->bi_error); + pr_warn("error %d reading superblock\n", bio->bi_status); unlock_page(page); } @@ -203,7 +203,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); - memcpy(s->s_uuid, str->sb_uuid, 16); + memcpy(&s->s_uuid, str->sb_uuid, 16); } /** diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 7a515345610c..e77bc52b468f 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -71,25 +71,14 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); } -static int gfs2_uuid_valid(const u8 *uuid) -{ - int i; - - for (i = 0; i < 16; i++) { - if (uuid[i]) - return 1; - } - return 0; -} - static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) { struct super_block *s = sdp->sd_vfs; - const u8 *uuid = s->s_uuid; + buf[0] = '\0'; - if (!gfs2_uuid_valid(uuid)) + if (uuid_is_null(&s->s_uuid)) return 0; - return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid); + return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid); } static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) @@ -712,14 +701,13 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); struct super_block *s = sdp->sd_vfs; - const u8 *uuid = s->s_uuid; add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid); - if (gfs2_uuid_valid(uuid)) - add_uevent_var(env, "UUID=%pUB", uuid); + if (!uuid_is_null(&s->s_uuid)) + add_uevent_var(env, "UUID=%pUB", &s->s_uuid); return 0; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dde861387a40..d44f5456eb9b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/fs/inode.c b/fs/inode.c index db5914783a71..ab3b9a795c0b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) i_gid_write(inode, 0); atomic_set(&inode->i_writecount, 0); inode->i_size = 0; + inode->i_write_hint = WRITE_LIFE_NOT_SET; inode->i_blocks = 0; inode->i_bytes = 0; inode->i_generation = 0; @@ -1891,11 +1892,11 @@ static void __wait_on_freeing_inode(struct inode *inode) wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); schedule(); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); spin_lock(&inode_hash_lock); } @@ -2038,11 +2039,11 @@ static void __inode_dio_wait(struct inode *inode) DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); do { - prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); if (atomic_read(&inode->i_dio_count)) schedule(); } while (atomic_read(&inode->i_dio_count)); - finish_wait(wq, &q.wait); + finish_wait(wq, &q.wq_entry); } /** diff --git a/fs/iomap.c b/fs/iomap.c index 4b10892967a5..fa6cd5b3f578 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio) struct iomap_dio *dio = bio->bi_private; bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); - if (bio->bi_error) - iomap_dio_set_error(dio, bio->bi_error); + if (bio->bi_status) + iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); if (atomic_dec_and_test(&dio->ref)) { if (is_sync_kiocb(dio->iocb)) { @@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, bio->bi_bdev = iomap->bdev; bio->bi_iter.bi_sector = iomap->blkno + ((pos - iomap->offset) >> 9); + bio->bi_write_hint = dio->iocb->ki_hint; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; @@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, flags |= IOMAP_WRITE; } + if (iocb->ki_flags & IOCB_NOWAIT) { + if (filemap_range_has_page(mapping, start, end)) { + ret = -EAGAIN; + goto out_free_dio; + } + flags |= IOMAP_NOWAIT; + } + ret = filemap_write_and_wait_range(mapping, start, end); if (ret) goto out_free_dio; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ebad34266bcf..7d5ef3bf3f3e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2579,10 +2579,10 @@ restart: wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&journal->j_list_lock); schedule(); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); goto restart; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 9ee4832b6f8b..8b08044b3120 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -409,25 +409,6 @@ static handle_t *new_handle(int nblocks) return handle; } -/** - * handle_t *jbd2_journal_start() - Obtain a new handle. - * @journal: Journal to start transaction on. - * @nblocks: number of block buffer we might modify - * - * We make sure that the transaction can guarantee at least nblocks of - * modified buffers in the log. We block until the log can guarantee - * that much space. Additionally, if rsv_blocks > 0, we also create another - * handle with rsv_blocks reserved blocks in the journal. This handle is - * is stored in h_rsv_handle. It is not attached to any particular transaction - * and thus doesn't block transaction commit. If the caller uses this reserved - * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() - * on the parent handle will dispose the reserved one. Reserved handle has to - * be converted to a normal handle using jbd2_journal_start_reserved() before - * it can be used. - * - * Return a pointer to a newly allocated handle, or an ERR_PTR() value - * on failure. - */ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, gfp_t gfp_mask, unsigned int type, unsigned int line_no) @@ -478,6 +459,25 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, EXPORT_SYMBOL(jbd2__journal_start); +/** + * handle_t *jbd2_journal_start() - Obtain a new handle. + * @journal: Journal to start transaction on. + * @nblocks: number of block buffer we might modify + * + * We make sure that the transaction can guarantee at least nblocks of + * modified buffers in the log. We block until the log can guarantee + * that much space. Additionally, if rsv_blocks > 0, we also create another + * handle with rsv_blocks reserved blocks in the journal. This handle is + * is stored in h_rsv_handle. It is not attached to any particular transaction + * and thus doesn't block transaction commit. If the caller uses this reserved + * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() + * on the parent handle will dispose the reserved one. Reserved handle has to + * be converted to a normal handle using jbd2_journal_start_reserved() before + * it can be used. + * + * Return a pointer to a newly allocated handle, or an ERR_PTR() value + * on failure. + */ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) { return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0); @@ -680,6 +680,12 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_); handle->h_buffer_credits = nblocks; + /* + * Restore the original nofs context because the journal restart + * is basically the same thing as journal stop and start. + * start_this_handle will start a new nofs context. + */ + memalloc_nofs_restore(handle->saved_alloc_context); ret = start_this_handle(journal, handle, gfp_mask); return ret; } @@ -1066,10 +1072,10 @@ out: * @handle: transaction to add buffer modifications to * @bh: bh to be used for metadata writes * - * Returns an error code or 0 on success. + * Returns: error code or 0 on success. * * In full data journalling mode the buffer may be of type BJ_AsyncData, - * because we're write()ing a buffer which is also part of a shared mapping. + * because we're ``write()ing`` a buffer which is also part of a shared mapping. */ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index bb1da1feafeb..a21f0e9eecd4 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio) bp->l_flag |= lbmDONE; - if (bio->bi_error) { + if (bio->bi_status) { bp->l_flag |= lbmERROR; jfs_err("lbmIODone: I/O error in JFS log"); diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 489aaa1403e5..ce93db3aef3c 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio) { struct page *page = bio->bi_private; - if (bio->bi_error) { + if (bio->bi_status) { printk(KERN_ERR "metapage_read_end_io: I/O error\n"); SetPageError(page); } @@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio) BUG_ON(!PagePrivate(page)); - if (bio->bi_error) { + if (bio->bi_status) { printk(KERN_ERR "metapage_write_end_io: I/O error\n"); SetPageError(page); } diff --git a/fs/mpage.c b/fs/mpage.c index baff8f820c29..2e4c41ccb5c9 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - page_endio(page, op_is_write(bio_op(bio)), bio->bi_error); + page_endio(page, op_is_write(bio_op(bio)), + blk_status_to_errno(bio->bi_status)); } bio_put(bio); @@ -344,6 +345,7 @@ confused: * * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be * submitted in the following order: + * * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 * * because the indirect block has to be read to get the mappings of blocks @@ -614,6 +616,7 @@ alloc_new: goto confused; wbc_init_bio(wbc, bio); + bio->bi_write_hint = inode->i_write_hint; } /* diff --git a/fs/namei.c b/fs/namei.c index 6571a5f5112e..8bacc390c51e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4332,6 +4332,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * The worst of all namespace operations - renaming directory. "Perverted" * doesn't even start to describe it. Somebody in UCB had a heck of a trip... * Problems: + * * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. * That's where 4.4 screws up. Current fix: serialization on diff --git a/fs/namespace.c b/fs/namespace.c index 8bd3e4d448b9..5a4438445bf7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3488,6 +3488,8 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) return err; } + put_mnt_ns(old_mnt_ns); + /* Update the pwd and root */ set_fs_pwd(fs, &root); set_fs_root(fs, &root); diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 0ca370d23ddb..d8863a804b15 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio) { struct parallel_io *par = bio->bi_private; - if (bio->bi_error) { + if (bio->bi_status) { struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) @@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio) struct parallel_io *par = bio->bi_private; struct nfs_pgio_header *header = par->data; - if (bio->bi_error) { + if (bio->bi_status) { if (!header->pnfs_error) header->pnfs_error = -EIO; pnfs_set_lo_fail(header->lseg); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c14758e08d73..390ac9c39c59 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session, * A single slot, so highest used slotid is either 0 or -1 */ nfs4_free_slot(tbl, slot); - nfs4_slot_tbl_drain_complete(tbl); spin_unlock(&tbl->slot_tbl_lock); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 32ccd7754f8a..2ac00bf4ecf1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL_GPL(nfs_link); -static void -nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data) -{ - struct dentry *old_dentry = data->old_dentry; - struct dentry *new_dentry = data->new_dentry; - struct inode *old_inode = d_inode(old_dentry); - struct inode *new_inode = d_inode(new_dentry); - - nfs_mark_for_revalidate(old_inode); - - switch (task->tk_status) { - case 0: - if (new_inode != NULL) - nfs_drop_nlink(new_inode); - d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, - nfs_save_change_attribute(data->new_dir)); - break; - case -ENOENT: - nfs_dentry_handle_enoent(old_dentry); - } -} - /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a @@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct dentry *dentry = NULL; + struct dentry *dentry = NULL, *rehash = NULL; struct rpc_task *task; int error = -EBUSY; @@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * To prevent any new references to the target during the * rename, we unhash the dentry in advance. */ - if (!d_unhashed(new_dentry)) + if (!d_unhashed(new_dentry)) { d_drop(new_dentry); + rehash = new_dentry; + } if (d_count(new_dentry) > 2) { int err; @@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; new_dentry = dentry; + rehash = NULL; new_inode = NULL; } } @@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode != NULL) NFS_PROTO(new_inode)->return_delegation(new_inode); - task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, - nfs_complete_rename); + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); goto out; @@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error == 0) error = task->tk_status; rpc_put_task(task); + nfs_mark_for_revalidate(old_inode); out: + if (rehash) + d_rehash(rehash); trace_nfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, error); + if (!error) { + if (new_inode != NULL) + nfs_drop_nlink(new_inode); + /* + * The d_move() should be here instead of in an async RPC completion + * handler because we need the proper locks to move the dentry. If + * we're interrupted by a signal, the async RPC completion handler + * should mark the directories for revalidation. + */ + d_move(old_dentry, new_dentry); + nfs_set_verifier(new_dentry, + nfs_save_change_attribute(new_dir)); + } else if (error == -ENOENT) + nfs_dentry_handle_enoent(old_dentry); + /* new dentry created? */ if (dentry) dput(dentry); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3e24392f2caa..8701d7617964 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -7,6 +7,7 @@ #include <linux/security.h> #include <linux/crc32.h> #include <linux/nfs_page.h> +#include <linux/wait_bit.h> #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c08c46a3b8cd..98b0b662af09 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, /* Except MODE, it seems harmless of setting twice. */ if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && - attrset[1] & FATTR4_WORD1_MODE) + (attrset[1] & FATTR4_WORD1_MODE || + attrset[2] & FATTR4_WORD2_MODE_UMASK)) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) @@ -6372,7 +6373,7 @@ struct nfs4_lock_waiter { }; static int -nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key) +nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key) { int ret; struct cb_notify_lock_args *cbnl = key; @@ -6415,7 +6416,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) .inode = state->inode, .owner = &owner, .notified = false }; - wait_queue_t wait; + wait_queue_entry_t wait; /* Don't bother with waitqueue if we don't expect a callback */ if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags)) @@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata) size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_sequence_free_slot(&lgp->res.seq_res); nfs4_free_pages(lgp->args.layout.pages, max_pages); pnfs_put_layout_hdr(NFS_I(inode)->layout); put_nfs_open_context(lgp->args.ctx); @@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ if (status == 0 && lgp->res.layoutp->len) lseg = pnfs_layout_process(lgp); - nfs4_sequence_free_slot(&lgp->res.seq_res); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b34de036501b..cbf82b0d4467 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2134,6 +2134,8 @@ again: put_rpccred(cred); switch (status) { case 0: + case -EINTR: + case -ERESTARTSYS: break; case -ETIMEDOUT: if (clnt->cl_softrtry) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index fb5213afc854..c862c2489df0 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, u8 *buf, *d, type, assoc; int error; + if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q))) + return -EINVAL; + buf = kzalloc(bufflen, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, goto out_free_buf; } req = scsi_req(rq); - scsi_req_init(rq); error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); if (error) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index e71f11b1a180..3bc08c394a3f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -486,7 +486,7 @@ secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif static inline int -uuid_parse(char **mesg, char *buf, unsigned char **puuid) +nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid) { int len; @@ -586,7 +586,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); else if (strcmp(buf, "uuid") == 0) - err = uuid_parse(&mesg, buf, &exp.ex_uuid); + err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid); else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); else diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 6f87b2ac1aeb..e73c86d9855c 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio) { struct nilfs_segment_buffer *segbuf = bio->bi_private; - if (bio->bi_error) + if (bio->bi_status) atomic_inc(&segbuf->sb_err); bio_put(bio); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index febed1217b3f..70ded52dc1dd 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2161,7 +2161,7 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino) } struct nilfs_segctor_wait_request { - wait_queue_t wq; + wait_queue_entry_t wq; __u32 seq; int err; atomic_t done; @@ -2206,8 +2206,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) unsigned long flags; spin_lock_irqsave(&sci->sc_wait_request.lock, flags); - list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, - wq.task_list) { + list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { wrq->err = err; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 0da0332725aa..ffe003982d95 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio) { struct o2hb_bio_wait_ctxt *wc = bio->bi_private; - if (bio->bi_error) { - mlog(ML_ERROR, "IO Error %d\n", bio->bi_error); - wc->wc_error = bio->bi_error; + if (bio->bi_status) { + mlog(ML_ERROR, "IO Error %d\n", bio->bi_status); + wc->wc_error = blk_status_to_errno(bio->bi_status); } o2hb_bio_wait_dec(wc, 1); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3b7c937a36b5..4689940a953c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode, struct ocfs2_lock_res *lockres; lockres = &OCFS2_I(inode)->ip_inode_lockres; + /* had_lock means that the currect process already takes the cluster + * lock previously. If had_lock is 1, we have nothing to do here, and + * it will get unlocked where we got the lock. + */ if (!had_lock) { ocfs2_remove_holder(lockres, oh); ocfs2_inode_unlock(inode, ex); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ca1646fbcaef..83005f486451 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2062,7 +2062,7 @@ static int ocfs2_initialize_super(struct super_block *sb, cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); - memcpy(sb->s_uuid, di->id2.i_super.s_uuid, + memcpy(&sb->s_uuid, di->id2.i_super.s_uuid, sizeof(di->id2.i_super.s_uuid)); osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3c5384d9b3a5..f70c3778d600 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode, void *buffer, size_t buffer_size) { - int ret; + int ret, had_lock; struct buffer_head *di_bh = NULL; + struct ocfs2_lock_holder oh; - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret < 0) { - mlog_errno(ret); - return ret; + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); + if (had_lock < 0) { + mlog_errno(had_lock); + return had_lock; } down_read(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, name, buffer, buffer_size); up_read(&OCFS2_I(inode)->ip_xattr_sem); - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); @@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode, { struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; - int ret, credits, ref_meta = 0, ref_credits = 0; + int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; struct ocfs2_refcount_tree *ref_tree = NULL; + struct ocfs2_lock_holder oh; struct ocfs2_xattr_info xi = { .xi_name_index = name_index, @@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode, return -ENOMEM; } - ret = ocfs2_inode_lock(inode, &di_bh, 1); - if (ret < 0) { + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); + if (had_lock < 0) { + ret = had_lock; mlog_errno(ret); goto cleanup_nolock; } @@ -3670,7 +3673,7 @@ cleanup: if (ret) mlog_errno(ret); } - ocfs2_inode_unlock(inode, 1); + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); cleanup_nolock: brelse(di_bh); brelse(xbs.xattr_bh); diff --git a/fs/open.c b/fs/open.c index cd0c5be8d012..3fe0c4aa7d27 100644 --- a/fs/open.c +++ b/fs/open.c @@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f, likely(f->f_op->write || f->f_op->write_iter)) f->f_mode |= FMODE_CAN_WRITE; + f->f_write_hint = WRITE_LIFE_NOT_SET; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 83b506020718..038d67545d9f 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -46,8 +46,8 @@ static void run_down(struct slot_map *m) spin_lock(&m->q.lock); if (m->c != -1) { for (;;) { - if (likely(list_empty(&wait.task_list))) - __add_wait_queue_tail(&m->q, &wait); + if (likely(list_empty(&wait.entry))) + __add_wait_queue_entry_tail(&m->q, &wait); set_current_state(TASK_UNINTERRUPTIBLE); if (m->c == -1) @@ -84,8 +84,8 @@ static int wait_for_free(struct slot_map *m) do { long n = left, t; - if (likely(list_empty(&wait.task_list))) - __add_wait_queue_tail_exclusive(&m->q, &wait); + if (likely(list_empty(&wait.entry))) + __add_wait_queue_entry_tail_exclusive(&m->q, &wait); set_current_state(TASK_INTERRUPTIBLE); if (m->c > 0) @@ -108,8 +108,8 @@ static int wait_for_free(struct slot_map *m) left = -EINTR; } while (left > 0); - if (!list_empty(&wait.task_list)) - list_del(&wait.task_list); + if (!list_empty(&wait.entry)) + list_del(&wait.entry); else if (left <= 0 && waitqueue_active(&m->q)) __wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL); __set_current_state(TASK_RUNNING); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 7a44533f4bbf..e5869f91b3ab 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -233,7 +233,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid) +static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid) { struct ovl_fh *fh; int fh_type, fh_len, dwords; @@ -284,7 +284,6 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, struct dentry *upper) { struct super_block *sb = lower->d_sb; - uuid_be *uuid = (uuid_be *) &sb->s_uuid; const struct ovl_fh *fh = NULL; int err; @@ -294,8 +293,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, * up and a pure upper inode. */ if (sb->s_export_op && sb->s_export_op->fh_to_dentry && - uuid_be_cmp(*uuid, NULL_UUID_BE)) { - fh = ovl_encode_fh(lower, uuid); + !uuid_is_null(&sb->s_uuid)) { + fh = ovl_encode_fh(lower, &sb->s_uuid); if (IS_ERR(fh)) return PTR_ERR(fh); } @@ -330,15 +329,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, .link = link }; - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto out; - err = security_inode_copy_up(dentry, &new_creds); if (err < 0) - goto out1; + goto out; if (new_creds) old_creds = override_creds(new_creds); @@ -362,7 +355,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, } if (err) - goto out2; + goto out; if (S_ISREG(stat->mode)) { struct path upperpath; @@ -398,10 +391,23 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, /* * Store identifier of lower inode in upper inode xattr to * allow lookup of the copy up origin inode. + * + * Don't set origin when we are breaking the association with a lower + * hard link. */ - err = ovl_set_origin(dentry, lowerpath->dentry, temp); - if (err) + if (S_ISDIR(stat->mode) || stat->nlink == 1) { + err = ovl_set_origin(dentry, lowerpath->dentry, temp); + if (err) + goto out_cleanup; + } + + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + if (IS_ERR(upper)) { + err = PTR_ERR(upper); + upper = NULL; goto out_cleanup; + } if (tmpfile) err = ovl_do_link(temp, udir, upper, true); @@ -416,17 +422,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, /* Restore timestamps on parent (best effort) */ ovl_set_timestamps(upperdir, pstat); -out2: +out: dput(temp); -out1: dput(upper); -out: return err; out_cleanup: if (!tmpfile) ovl_cleanup(wdir, temp); - goto out2; + goto out; } /* diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index f3136c31e72a..de0d4f742f36 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -135,7 +135,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, * Make sure that the stored uuid matches the uuid of the lower * layer where file handle will be decoded. */ - if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid)) + if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) goto out; origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0623cebeefff..10863b4105fa 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -56,7 +56,7 @@ struct ovl_fh { u8 len; /* size of this header + size of fid */ u8 flags; /* OVL_FH_FLAG_* */ u8 type; /* fid_type of fid */ - uuid_be uuid; /* uuid of filesystem */ + uuid_t uuid; /* uuid of filesystem */ u8 fid[0]; /* file identifier */ } __packed; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f0c8b33d99b1..520802da059c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (stack_guard_page_start(vma, start)) - start += PAGE_SIZE; end = vma->vm_end; - if (stack_guard_page_end(vma, end)) - end -= PAGE_SIZE; seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index ebf80c7739e1..48813aeaab80 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode) } EXPORT_SYMBOL(dquot_initialize); +bool dquot_initialize_needed(struct inode *inode) +{ + struct dquot **dquots; + int i; + + if (!dquot_active(inode)) + return false; + + dquots = i_dquot(inode); + for (i = 0; i < MAXQUOTAS; i++) + if (!dquots[i] && sb_has_quota_active(inode->i_sb, i)) + return true; + return false; +} +EXPORT_SYMBOL(dquot_initialize_needed); + /* * Release all quotas referenced by inode. * diff --git a/fs/read_write.c b/fs/read_write.c index 47c1d4484df9..d591eeed061f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, struct kiocb kiocb; ssize_t ret; - if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC)) - return -EOPNOTSUPP; - init_sync_kiocb(&kiocb, filp); - if (flags & RWF_HIPRI) - kiocb.ki_flags |= IOCB_HIPRI; - if (flags & RWF_DSYNC) - kiocb.ki_flags |= IOCB_DSYNC; - if (flags & RWF_SYNC) - kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + ret = kiocb_set_rw_flags(&kiocb, flags); + if (ret) + return ret; kiocb.ki_pos = *ppos; if (type == READ) @@ -1285,7 +1279,7 @@ static size_t compat_writev(struct file *file, if (!(file->f_mode & FMODE_CAN_WRITE)) goto out; - ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0); + ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags); out: if (ret > 0) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 39bb1e838d8d..a11d773e5ff3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2956,7 +2956,7 @@ void reiserfs_wait_on_write_block(struct super_block *s) static void queue_log_writer(struct super_block *s) { - wait_queue_t wait; + wait_queue_entry_t wait; struct reiserfs_journal *journal = SB_JOURNAL(s); set_bit(J_WRITERS_QUEUED, &journal->j_state); diff --git a/fs/select.c b/fs/select.c index d6c652a31e99..5b524a977d91 100644 --- a/fs/select.c +++ b/fs/select.c @@ -180,7 +180,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) return table->entry++; } -static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct poll_wqueues *pwq = wait->private; DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); @@ -206,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) return default_wake_function(&dummy_wait, mode, sync, key); } -static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct poll_table_entry *entry; diff --git a/fs/signalfd.c b/fs/signalfd.c index 7e3d71109f51..593b022ac11b 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -43,7 +43,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) if (likely(!waitqueue_active(wqh))) return; - /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */ + /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ wake_up_poll(wqh, POLLHUP | POLLFREE); } diff --git a/fs/stat.c b/fs/stat.c index f494b182c7c7..c35610845ab1 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -672,6 +672,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes) inode->i_bytes -= 512; } } +EXPORT_SYMBOL(__inode_add_bytes); void inode_add_bytes(struct inode *inode, loff_t bytes) { diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index a0376a2c1c29..f80be4c5df9d 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -82,7 +82,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ufs_error (sb, "ufs_free_fragments", "bit already cleared for fragment %u", i); } - + + inode_sub_bytes(inode, count << uspi->s_fshift); fs32_add(sb, &ucg->cg_cs.cs_nffree, count); uspi->cs_total.cs_nffree += count; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); @@ -184,6 +185,7 @@ do_more: ufs_error(sb, "ufs_free_blocks", "freeing free fragment"); } ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); + inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, 1); @@ -398,10 +400,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, /* * There is not enough space for user on the device */ - if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - mutex_unlock(&UFS_SB(sb)->s_lock); - UFSD("EXIT (FAILED)\n"); - return 0; + if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) { + if (!capable(CAP_SYS_RESOURCE)) { + mutex_unlock(&UFS_SB(sb)->s_lock); + UFSD("EXIT (FAILED)\n"); + return 0; + } } if (goal >= uspi->s_size) @@ -419,12 +423,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, if (result) { ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); + *err = 0; write_seqlock(&UFS_I(inode)->meta_lock); ufs_cpu_to_data_ptr(sb, p, result); - write_sequnlock(&UFS_I(inode)->meta_lock); - *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); + write_sequnlock(&UFS_I(inode)->meta_lock); } mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); @@ -437,8 +441,10 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, result = ufs_add_fragments(inode, tmp, oldcount, newcount); if (result) { *err = 0; + read_seqlock_excl(&UFS_I(inode)->meta_lock); UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); + read_sequnlock_excl(&UFS_I(inode)->meta_lock); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); mutex_unlock(&UFS_SB(sb)->s_lock); @@ -449,39 +455,29 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, /* * allocate new block and move data */ - switch (fs32_to_cpu(sb, usb1->fs_optim)) { - case UFS_OPTSPACE: + if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) { request = newcount; - if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree - > uspi->s_dsize * uspi->s_minfree / (2 * 100)) - break; - usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); - break; - default: - usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); - - case UFS_OPTTIME: + if (uspi->cs_total.cs_nffree < uspi->s_space_to_time) + usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); + } else { request = uspi->s_fpb; - if (uspi->cs_total.cs_nffree < uspi->s_dsize * - (uspi->s_minfree - 2) / 100) - break; - usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME); - break; + if (uspi->cs_total.cs_nffree > uspi->s_time_to_space) + usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE); } result = ufs_alloc_fragments (inode, cgno, goal, request, err); if (result) { ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); + mutex_unlock(&UFS_SB(sb)->s_lock); ufs_change_blocknr(inode, fragment - oldcount, oldcount, uspi->s_sbbase + tmp, uspi->s_sbbase + result, locked_page); + *err = 0; write_seqlock(&UFS_I(inode)->meta_lock); ufs_cpu_to_data_ptr(sb, p, result); - write_sequnlock(&UFS_I(inode)->meta_lock); - *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - mutex_unlock(&UFS_SB(sb)->s_lock); + write_sequnlock(&UFS_I(inode)->meta_lock); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -494,6 +490,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return 0; } +static bool try_add_frags(struct inode *inode, unsigned frags) +{ + unsigned size = frags * i_blocksize(inode); + spin_lock(&inode->i_lock); + __inode_add_bytes(inode, size); + if (unlikely((u32)inode->i_blocks != inode->i_blocks)) { + __inode_sub_bytes(inode, size); + spin_unlock(&inode->i_lock); + return false; + } + spin_unlock(&inode->i_lock); + return true; +} + static u64 ufs_add_fragments(struct inode *inode, u64 fragment, unsigned oldcount, unsigned newcount) { @@ -530,6 +540,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, for (i = oldcount; i < newcount; i++) if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) return 0; + + if (!try_add_frags(inode, count)) + return 0; /* * Block can be extended */ @@ -647,6 +660,7 @@ cg_found: ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); i = uspi->s_fpb - count; + inode_sub_bytes(inode, i << uspi->s_fshift); fs32_add(sb, &ucg->cg_cs.cs_nffree, i); uspi->cs_total.cs_nffree += i; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i); @@ -657,6 +671,8 @@ cg_found: result = ufs_bitmap_search (sb, ucpi, goal, allocsize); if (result == INVBLOCK) return 0; + if (!try_add_frags(inode, count)) + return 0; for (i = 0; i < count; i++) ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); @@ -716,6 +732,8 @@ norot: return INVBLOCK; ucpi->c_rotor = result; gotit: + if (!try_add_frags(inode, uspi->s_fpb)) + return 0; blkno = ufs_fragstoblks(result); ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7e41aee7b69a..f36d6a53687d 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to, p = ufs_get_direct_data_ptr(uspi, ufsi, block); tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p), - new_size, err, locked_page); + new_size - (lastfrag & uspi->s_fpbmask), err, + locked_page); return tmp != 0; } @@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index, goal += uspi->s_fpb; } tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), - goal, uspi->s_fpb, err, locked_page); + goal, nfrags, err, locked_page); if (!tmp) { *err = -ENOSPC; @@ -400,11 +401,20 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff u64 phys64 = 0; unsigned frag = fragment & uspi->s_fpbmask; - if (!create) { - phys64 = ufs_frag_map(inode, offsets, depth); - goto out; - } + phys64 = ufs_frag_map(inode, offsets, depth); + if (!create) + goto done; + if (phys64) { + if (fragment >= UFS_NDIR_FRAGMENT) + goto done; + read_seqlock_excl(&UFS_I(inode)->meta_lock); + if (fragment < UFS_I(inode)->i_lastfrag) { + read_sequnlock_excl(&UFS_I(inode)->meta_lock); + goto done; + } + read_sequnlock_excl(&UFS_I(inode)->meta_lock); + } /* This code entered only while writing ....? */ mutex_lock(&UFS_I(inode)->truncate_mutex); @@ -448,6 +458,11 @@ out: } mutex_unlock(&UFS_I(inode)->truncate_mutex); return err; + +done: + if (phys64) + map_bh(bh_result, sb, phys64 + frag); + return 0; } static int ufs_writepage(struct page *page, struct writeback_control *wbc) @@ -551,10 +566,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink)); - if (inode->i_nlink == 0) { - ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); - return -1; - } + if (inode->i_nlink == 0) + return -ESTALE; /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -563,9 +576,9 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode)); inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size); - inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); - inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec); - inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec); + inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); + inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec); + inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec); inode->i_mtime.tv_nsec = 0; inode->i_atime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; @@ -599,10 +612,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink)); - if (inode->i_nlink == 0) { - ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); - return -1; - } + if (inode->i_nlink == 0) + return -ESTALE; /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -642,7 +653,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct buffer_head * bh; struct inode *inode; - int err; + int err = -EIO; UFSD("ENTER, ino %lu\n", ino); @@ -677,9 +688,10 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) err = ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); } - + brelse(bh); if (err) goto bad_inode; + inode->i_version++; ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -688,15 +700,13 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) ufs_set_inode_ops(inode); - brelse(bh); - UFSD("EXIT\n"); unlock_new_inode(inode); return inode; bad_inode: iget_failed(inode); - return ERR_PTR(-EIO); + return ERR_PTR(err); } static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode) @@ -841,8 +851,11 @@ void ufs_evict_inode(struct inode * inode) truncate_inode_pages_final(&inode->i_data); if (want_delete) { inode->i_size = 0; - if (inode->i_blocks) + if (inode->i_blocks && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) ufs_truncate_blocks(inode); + ufs_update_inode(inode, inode_needs_sync(inode)); } invalidate_inode_buffers(inode); @@ -868,7 +881,6 @@ static inline void free_data(struct to_free *ctx, u64 from, unsigned count) ctx->to = from + count; } -#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift) #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift) static void ufs_trunc_direct(struct inode *inode) @@ -1100,25 +1112,30 @@ out: return err; } -static void __ufs_truncate_blocks(struct inode *inode) +static void ufs_truncate_blocks(struct inode *inode) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; unsigned offsets[4]; - int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets); + int depth; int depth2; unsigned i; struct ufs_buffer_head *ubh[3]; void *p; u64 block; - if (!depth) - return; + if (inode->i_size) { + sector_t last = (inode->i_size - 1) >> uspi->s_bshift; + depth = ufs_block_to_path(inode, last, offsets); + if (!depth) + return; + } else { + depth = 1; + } - /* find the last non-zero in offsets[] */ for (depth2 = depth - 1; depth2; depth2--) - if (offsets[depth2]) + if (offsets[depth2] != uspi->s_apb - 1) break; mutex_lock(&ufsi->truncate_mutex); @@ -1127,9 +1144,8 @@ static void __ufs_truncate_blocks(struct inode *inode) offsets[0] = UFS_IND_BLOCK; } else { /* get the blocks that should be partially emptied */ - p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]); + p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++); for (i = 0; i < depth2; i++) { - offsets[i]++; /* next branch is fully freed */ block = ufs_data_ptr_to_cpu(sb, p); if (!block) break; @@ -1140,7 +1156,7 @@ static void __ufs_truncate_blocks(struct inode *inode) write_sequnlock(&ufsi->meta_lock); break; } - p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]); + p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++); } while (i--) free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1); @@ -1155,7 +1171,9 @@ static void __ufs_truncate_blocks(struct inode *inode) free_full_branch(inode, block, i - UFS_IND_BLOCK + 1); } } + read_seqlock_excl(&ufsi->meta_lock); ufsi->i_lastfrag = DIRECT_FRAGMENT; + read_sequnlock_excl(&ufsi->meta_lock); mark_inode_dirty(inode); mutex_unlock(&ufsi->truncate_mutex); } @@ -1183,7 +1201,7 @@ static int ufs_truncate(struct inode *inode, loff_t size) truncate_setsize(inode, size); - __ufs_truncate_blocks(inode); + ufs_truncate_blocks(inode); inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); out: @@ -1191,16 +1209,6 @@ out: return err; } -static void ufs_truncate_blocks(struct inode *inode) -{ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - __ufs_truncate_blocks(inode); -} - int ufs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 29ecaf739449..0a4f58a5073c 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -480,7 +480,7 @@ static void ufs_setup_cstotal(struct super_block *sb) usb3 = ubh_get_usb_third(uspi); if ((mtype == UFS_MOUNT_UFSTYPE_44BSD && - (usb1->fs_flags & UFS_FLAGS_UPDATED)) || + (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) || mtype == UFS_MOUNT_UFSTYPE_UFS2) { /*we have statistic in different place, then usual*/ uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir); @@ -596,9 +596,7 @@ static void ufs_put_cstotal(struct super_block *sb) usb2 = ubh_get_usb_second(uspi); usb3 = ubh_get_usb_third(uspi); - if ((mtype == UFS_MOUNT_UFSTYPE_44BSD && - (usb1->fs_flags & UFS_FLAGS_UPDATED)) || - mtype == UFS_MOUNT_UFSTYPE_UFS2) { + if (mtype == UFS_MOUNT_UFSTYPE_UFS2) { /*we have statistic in different place, then usual*/ usb2->fs_un.fs_u2.cs_ndir = cpu_to_fs64(sb, uspi->cs_total.cs_ndir); @@ -608,16 +606,26 @@ static void ufs_put_cstotal(struct super_block *sb) cpu_to_fs64(sb, uspi->cs_total.cs_nifree); usb3->fs_un1.fs_u2.cs_nffree = cpu_to_fs64(sb, uspi->cs_total.cs_nffree); - } else { - usb1->fs_cstotal.cs_ndir = - cpu_to_fs32(sb, uspi->cs_total.cs_ndir); - usb1->fs_cstotal.cs_nbfree = - cpu_to_fs32(sb, uspi->cs_total.cs_nbfree); - usb1->fs_cstotal.cs_nifree = - cpu_to_fs32(sb, uspi->cs_total.cs_nifree); - usb1->fs_cstotal.cs_nffree = - cpu_to_fs32(sb, uspi->cs_total.cs_nffree); + goto out; + } + + if (mtype == UFS_MOUNT_UFSTYPE_44BSD && + (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) { + /* store stats in both old and new places */ + usb2->fs_un.fs_u2.cs_ndir = + cpu_to_fs64(sb, uspi->cs_total.cs_ndir); + usb2->fs_un.fs_u2.cs_nbfree = + cpu_to_fs64(sb, uspi->cs_total.cs_nbfree); + usb3->fs_un1.fs_u2.cs_nifree = + cpu_to_fs64(sb, uspi->cs_total.cs_nifree); + usb3->fs_un1.fs_u2.cs_nffree = + cpu_to_fs64(sb, uspi->cs_total.cs_nffree); } + usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir); + usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree); + usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree); + usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree); +out: ubh_mark_buffer_dirty(USPI_UBH(uspi)); ufs_print_super_stuff(sb, usb1, usb2, usb3); UFSD("EXIT\n"); @@ -746,6 +754,23 @@ static void ufs_put_super(struct super_block *sb) return; } +static u64 ufs_max_bytes(struct super_block *sb) +{ + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; + int bits = uspi->s_apbshift; + u64 res; + + if (bits > 21) + res = ~0ULL; + else + res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) + + (1LL << (3*bits)); + + if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift)) + return MAX_LFS_FILESIZE; + return res << uspi->s_bshift; +} + static int ufs_fill_super(struct super_block *sb, void *data, int silent) { struct ufs_sb_info * sbi; @@ -979,6 +1004,13 @@ again: flags |= UFS_ST_SUN; } + if ((flags & UFS_ST_MASK) == UFS_ST_44BSD && + uspi->s_postblformat == UFS_42POSTBLFMT) { + if (!silent) + pr_err("this is not a 44bsd filesystem"); + goto failed; + } + /* * Check ufs magic number */ @@ -1126,8 +1158,8 @@ magic_found: uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask); if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { - uspi->s_u2_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size); - uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); + uspi->s_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size); + uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); } else { uspi->s_size = fs32_to_cpu(sb, usb1->fs_size); uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize); @@ -1176,6 +1208,18 @@ magic_found: uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff); uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff); + uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize, + uspi->s_minfree, 100); + if (uspi->s_minfree <= 5) { + uspi->s_time_to_space = ~0ULL; + uspi->s_space_to_time = 0; + usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE); + } else { + uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1; + uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize, + uspi->s_minfree - 2, 100) - 1; + } + /* * Compute another frequently used values */ @@ -1211,6 +1255,7 @@ magic_found: "fast symlink size (%u)\n", uspi->s_maxsymlinklen); uspi->s_maxsymlinklen = maxsymlen; } + sb->s_maxbytes = ufs_max_bytes(sb); sb->s_max_links = UFS_LINK_MAX; inode = ufs_iget(sb, UFS_ROOTINO); @@ -1364,19 +1409,17 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) mutex_lock(&UFS_SB(sb)->s_lock); usb3 = ubh_get_usb_third(uspi); - if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { + if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) buf->f_type = UFS2_MAGIC; - buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); - } else { + else buf->f_type = UFS_MAGIC; - buf->f_blocks = uspi->s_dsize; - } - buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) + - uspi->cs_total.cs_nffree; + + buf->f_blocks = uspi->s_dsize; + buf->f_bfree = ufs_freefrags(uspi); buf->f_ffree = uspi->cs_total.cs_nifree; buf->f_bsize = sb->s_blocksize; - buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree)) - ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0; + buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks) + ? (buf->f_bfree - uspi->s_root_blocks) : 0; buf->f_files = uspi->s_ncg * uspi->s_ipg; buf->f_namelen = UFS_MAXNAMLEN; buf->f_fsid.val[0] = (u32)id; diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h index 0cbd5d340b67..150eef6f1233 100644 --- a/fs/ufs/ufs_fs.h +++ b/fs/ufs/ufs_fs.h @@ -733,10 +733,8 @@ struct ufs_sb_private_info { __u32 s_dblkno; /* offset of first data after cg */ __u32 s_cgoffset; /* cylinder group offset in cylinder */ __u32 s_cgmask; /* used to calc mod fs_ntrak */ - __u32 s_size; /* number of blocks (fragments) in fs */ - __u32 s_dsize; /* number of data blocks in fs */ - __u64 s_u2_size; /* ufs2: number of blocks (fragments) in fs */ - __u64 s_u2_dsize; /*ufs2: number of data blocks in fs */ + __u64 s_size; /* number of blocks (fragments) in fs */ + __u64 s_dsize; /* number of data blocks in fs */ __u32 s_ncg; /* number of cylinder groups */ __u32 s_bsize; /* size of basic blocks */ __u32 s_fsize; /* size of fragments */ @@ -793,6 +791,9 @@ struct ufs_sb_private_info { __u32 s_maxsymlinklen;/* upper limit on fast symlinks' size */ __s32 fs_magic; /* filesystem magic */ unsigned int s_dirblksize; + __u64 s_root_blocks; + __u64 s_time_to_space; + __u64 s_space_to_time; }; /* diff --git a/fs/ufs/util.c b/fs/ufs/util.c index f41ad0a6106f..02497a492eb2 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -243,9 +243,8 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev struct page *ufs_get_locked_page(struct address_space *mapping, pgoff_t index) { - struct page *page; - - page = find_lock_page(mapping, index); + struct inode *inode = mapping->host; + struct page *page = find_lock_page(mapping, index); if (!page) { page = read_mapping_page(mapping, index, NULL); @@ -253,7 +252,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping, printk(KERN_ERR "ufs_change_blocknr: " "read_mapping_page error: ino %lu, index: %lu\n", mapping->host->i_ino, index); - goto out; + return page; } lock_page(page); @@ -262,8 +261,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping, /* Truncate got there first */ unlock_page(page); put_page(page); - page = NULL; - goto out; + return NULL; } if (!PageUptodate(page) || PageError(page)) { @@ -272,11 +270,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping, printk(KERN_ERR "ufs_change_blocknr: " "can not read page: ino %lu, index: %lu\n", - mapping->host->i_ino, index); + inode->i_ino, index); - page = ERR_PTR(-EIO); + return ERR_PTR(-EIO); } } -out: + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, 0); return page; } diff --git a/fs/ufs/util.h b/fs/ufs/util.h index b7fbf53dbc81..9fc7119a1551 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -350,16 +350,11 @@ static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi, #define ubh_blkmap(ubh,begin,bit) \ ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb))) -/* - * Determine the number of available frags given a - * percentage to hold in reserve. - */ static inline u64 -ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved) +ufs_freefrags(struct ufs_sb_private_info *uspi) { return ufs_blkstofrags(uspi->cs_total.cs_nbfree) + - uspi->cs_total.cs_nffree - - (uspi->s_dsize * (percentreserved) / 100); + uspi->cs_total.cs_nffree; } /* @@ -473,15 +468,19 @@ static inline unsigned _ubh_find_last_zero_bit_( static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi, struct ufs_buffer_head * ubh, unsigned begin, unsigned block) { + u8 mask; switch (uspi->s_fpb) { case 8: return (*ubh_get_addr (ubh, begin + block) == 0xff); case 4: - return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2))); + mask = 0x0f << ((block & 0x01) << 2); + return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask; case 2: - return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1))); + mask = 0x03 << ((block & 0x03) << 1); + return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask; case 1: - return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07))); + mask = 0x01 << (block & 0x07); + return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask; } return 0; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f7555fc25877..6148ccd6cccf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -81,7 +81,7 @@ struct userfaultfd_unmap_ctx { struct userfaultfd_wait_queue { struct uffd_msg msg; - wait_queue_t wq; + wait_queue_entry_t wq; struct userfaultfd_ctx *ctx; bool waken; }; @@ -91,7 +91,7 @@ struct userfaultfd_wake_range { unsigned long len; }; -static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, +static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { struct userfaultfd_wake_range *range = key; @@ -129,7 +129,7 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, * wouldn't be enough, the smp_mb__before_spinlock is * enough to avoid an explicit smp_mb() here. */ - list_del_init(&wq->task_list); + list_del_init(&wq->entry); out: return ret; } @@ -340,9 +340,28 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) bool must_wait, return_to_userland; long blocking_state; - BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - ret = VM_FAULT_SIGBUS; + + /* + * We don't do userfault handling for the final child pid update. + * + * We also don't do userfault handling during + * coredumping. hugetlbfs has the special + * follow_hugetlb_page() to skip missing pages in the + * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with + * the no_page_table() helper in follow_page_mask(), but the + * shmem_vm_ops->fault method is invoked even during + * coredumping without mmap_sem and it ends up here. + */ + if (current->flags & (PF_EXITING|PF_DUMPCORE)) + goto out; + + /* + * Coredumping runs without mmap_sem so we can only check that + * the mmap_sem is held, if PF_DUMPCORE was not set. + */ + WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem)); + ctx = vmf->vma->vm_userfaultfd_ctx.ctx; if (!ctx) goto out; @@ -361,12 +380,6 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) goto out; /* - * We don't do userfault handling for the final child pid update. - */ - if (current->flags & PF_EXITING) - goto out; - - /* * Check that we can return VM_FAULT_RETRY. * * NOTE: it should become possible to return VM_FAULT_RETRY @@ -509,13 +522,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * and it's fine not to block on the spinlock. The uwq on this * kernel stack can be released after the list_del_init. */ - if (!list_empty_careful(&uwq.wq.task_list)) { + if (!list_empty_careful(&uwq.wq.entry)) { spin_lock(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ - list_del(&uwq.wq.task_list); + list_del(&uwq.wq.entry); spin_unlock(&ctx->fault_pending_wqh.lock); } @@ -847,7 +860,7 @@ wakeup: static inline struct userfaultfd_wait_queue *find_userfault_in( wait_queue_head_t *wqh) { - wait_queue_t *wq; + wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; VM_BUG_ON(!spin_is_locked(&wqh->lock)); @@ -856,7 +869,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in( if (!waitqueue_active(wqh)) goto out; /* walk in reverse to provide FIFO behavior to read userfaults */ - wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list); + wq = list_last_entry(&wqh->head, typeof(*wq), entry); uwq = container_of(wq, struct userfaultfd_wait_queue, wq); out: return uwq; @@ -990,14 +1003,14 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, * changes __remove_wait_queue() to use * list_del_init() in turn breaking the * !list_empty_careful() check in - * handle_userfault(). The uwq->wq.task_list + * handle_userfault(). The uwq->wq.head list * must never be empty at any time during the * refile, or the waitqueue could disappear * from under us. The "wait_queue_head_t" * parameter of __remove_wait_queue() is unused * anyway. */ - list_del(&uwq->wq.task_list); + list_del(&uwq->wq.entry); __add_wait_queue(&ctx->fault_wqh, &uwq->wq); write_seqcount_end(&ctx->refile_seq); @@ -1019,7 +1032,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, fork_nctx = (struct userfaultfd_ctx *) (unsigned long) uwq->msg.arg.reserved.reserved1; - list_move(&uwq->wq.task_list, &fork_event); + list_move(&uwq->wq.entry, &fork_event); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; @@ -1056,8 +1069,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!list_empty(&fork_event)) { uwq = list_first_entry(&fork_event, typeof(*uwq), - wq.task_list); - list_del(&uwq->wq.task_list); + wq.entry); + list_del(&uwq->wq.entry); __add_wait_queue(&ctx->event_wqh, &uwq->wq); userfaultfd_event_complete(ctx, uwq); } @@ -1734,17 +1747,17 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd, static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) { struct userfaultfd_ctx *ctx = f->private_data; - wait_queue_t *wq; + wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; unsigned long pending = 0, total = 0; spin_lock(&ctx->fault_pending_wqh.lock); - list_for_each_entry(wq, &ctx->fault_pending_wqh.task_list, task_list) { + list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { uwq = container_of(wq, struct userfaultfd_wait_queue, wq); pending++; total++; } - list_for_each_entry(wq, &ctx->fault_wqh.task_list, task_list) { + list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { uwq = container_of(wq, struct userfaultfd_wait_queue, wq); total++; } diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 5c90f82b8f6b..a6e955bfead8 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -98,8 +98,7 @@ xfs-y += xfs_aops.o \ xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ - kmem.o \ - uuid.o + kmem.o # low-level transaction/log code xfs-y += xfs_log.o \ diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c deleted file mode 100644 index b83f76b6d410..000000000000 --- a/fs/xfs/uuid.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include <xfs.h> - -/* IRIX interpretation of an uuid_t */ -typedef struct { - __be32 uu_timelow; - __be16 uu_timemid; - __be16 uu_timehi; - __be16 uu_clockseq; - __be16 uu_node[3]; -} xfs_uu_t; - -/* - * uuid_getnodeuniq - obtain the node unique fields of a UUID. - * - * This is not in any way a standard or condoned UUID function; - * it just something that's needed for user-level file handles. - */ -void -uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) -{ - xfs_uu_t *uup = (xfs_uu_t *)uuid; - - fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | - be16_to_cpu(uup->uu_timemid); - fsid[1] = be32_to_cpu(uup->uu_timelow); -} - -int -uuid_is_nil(uuid_t *uuid) -{ - int i; - char *cp = (char *)uuid; - - if (uuid == NULL) - return 0; - /* implied check of version number here... */ - for (i = 0; i < sizeof *uuid; i++) - if (*cp++) return 0; /* not nil */ - return 1; /* is nil */ -} - -int -uuid_equal(uuid_t *uuid1, uuid_t *uuid2) -{ - return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; -} diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h deleted file mode 100644 index 104db0f3bed6..000000000000 --- a/fs/xfs/uuid.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_UUID_H__ -#define __XFS_SUPPORT_UUID_H__ - -typedef struct { - unsigned char __u_bits[16]; -} uuid_t; - -extern int uuid_is_nil(uuid_t *uuid); -extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); -extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); - -static inline void -uuid_copy(uuid_t *dst, uuid_t *src) -{ - memcpy(dst, src, sizeof(uuid_t)); -} - -#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 09af0f7cd55e..d20c29b9c95b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -276,7 +276,7 @@ xfs_end_io( struct xfs_inode *ip = XFS_I(ioend->io_inode); xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; - int error = ioend->io_bio->bi_error; + int error; /* * Just clean up the in-memory strutures if the fs has been shut down. @@ -289,6 +289,7 @@ xfs_end_io( /* * Clean up any COW blocks on an I/O error. */ + error = blk_status_to_errno(ioend->io_bio->bi_status); if (unlikely(error)) { switch (ioend->io_type) { case XFS_IO_COW: @@ -332,7 +333,7 @@ xfs_end_bio( else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); else - xfs_destroy_ioend(ioend, bio->bi_error); + xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status)); } STATIC int @@ -500,11 +501,12 @@ xfs_submit_ioend( * time. */ if (status) { - ioend->io_bio->bi_error = status; + ioend->io_bio->bi_status = errno_to_blk_status(status); bio_endio(ioend->io_bio); return status; } + ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; submit_bio(ioend->io_bio); return 0; } @@ -564,6 +566,7 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); + ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; submit_bio(ioend->io_bio); ioend->io_bio = new; } @@ -1316,9 +1319,12 @@ xfs_vm_bmap( * The swap code (ab-)uses ->bmap to get a block mapping and then * bypasseѕ the file system for actual I/O. We really can't allow * that on reflinks inodes, so we have to skip out here. And yes, - * 0 is the magic code for a bmap error.. + * 0 is the magic code for a bmap error. + * + * Since we don't pass back blockdev info, we can't return bmap + * information for rt files either. */ - if (xfs_is_reflink_inode(ip)) + if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) return 0; filemap_write_and_wait(mapping); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 07b77b73b024..438505f395e7 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -117,7 +117,7 @@ static inline void __xfs_buf_ioacct_dec( struct xfs_buf *bp) { - ASSERT(spin_is_locked(&bp->b_lock)); + lockdep_assert_held(&bp->b_lock); if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; @@ -1227,8 +1227,11 @@ xfs_buf_bio_end_io( * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. */ - if (bio->bi_error) - cmpxchg(&bp->b_io_error, 0, bio->bi_error); + if (bio->bi_status) { + int error = blk_status_to_errno(bio->bi_status); + + cmpxchg(&bp->b_io_error, 0, error); + } if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 5fb5a0958a14..17f27a2fb5e2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -237,7 +237,11 @@ xfs_file_dax_read( if (!count) return 0; /* skip atime */ - xfs_ilock(ip, XFS_IOLOCK_SHARED); + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -541,7 +545,11 @@ xfs_file_dio_aio_write( iolock = XFS_IOLOCK_SHARED; } - xfs_ilock(ip, iolock); + if (!xfs_ilock_nowait(ip, iolock)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, iolock); + } ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) @@ -553,9 +561,15 @@ xfs_file_dio_aio_write( * otherwise demote the lock if we had to take the exclusive lock * for other reasons in xfs_file_aio_write_checks. */ - if (unaligned_io) - inode_dio_wait(inode); - else if (iolock == XFS_IOLOCK_EXCL) { + if (unaligned_io) { + /* If we are going to wait for other DIO to finish, bail */ + if (iocb->ki_flags & IOCB_NOWAIT) { + if (atomic_read(&inode->i_dio_count)) + return -EAGAIN; + } else { + inode_dio_wait(inode); + } + } else if (iolock == XFS_IOLOCK_EXCL) { xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; } @@ -585,7 +599,12 @@ xfs_file_dax_write( size_t count; loff_t pos; - xfs_ilock(ip, iolock); + if (!xfs_ilock_nowait(ip, iolock)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, iolock); + } + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; @@ -892,6 +911,7 @@ xfs_file_open( return -EFBIG; if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) return -EIO; + file->f_mode |= FMODE_AIO_NOWAIT; return 0; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f61c84f8e31a..b9c12e1cc23a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -66,7 +66,6 @@ xfs_inode_alloc( XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); @@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (pag->pag_ici_reclaimable++) return; @@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (--pag->pag_ici_reclaimable) return; @@ -270,12 +269,12 @@ xfs_inew_wait( DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); do { - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (!xfs_iflags_test(ip, XFS_INEW)) break; schedule(); } while (true); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ec9826c56500..c0a1e840a588 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -622,12 +622,12 @@ __xfs_iflock( DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); do { - prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (xfs_isiflocked(ip)) io_schedule(); } while (!xfs_iflock_nowait(ip)); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } STATIC uint @@ -2486,11 +2486,11 @@ __xfs_iunpin_wait( xfs_iunpin(ip); do { - prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); if (xfs_ipincount(ip)) io_schedule(); } while (xfs_ipincount(ip)); - finish_wait(wq, &wait.wait); + finish_wait(wq, &wait.wq_entry); } void diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 08cb7d1a4a3a..013cc78d7daf 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -834,9 +834,7 @@ xfs_inode_item_format_convert( in_f->ilf_dsize = in_f32->ilf_dsize; in_f->ilf_ino = in_f32->ilf_ino; /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f32->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); in_f->ilf_blkno = in_f32->ilf_blkno; in_f->ilf_len = in_f32->ilf_len; in_f->ilf_boffset = in_f32->ilf_boffset; @@ -851,9 +849,7 @@ xfs_inode_item_format_convert( in_f->ilf_dsize = in_f64->ilf_dsize; in_f->ilf_ino = in_f64->ilf_ino; /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f64->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid); in_f->ilf_blkno = in_f64->ilf_blkno; in_f->ilf_len = in_f64->ilf_len; in_f->ilf_boffset = in_f64->ilf_boffset; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 94e5bdf7304c..05dc87e8c1f5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -995,6 +995,11 @@ xfs_file_iomap_begin( lockmode = xfs_ilock_data_map_shared(ip); } + if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) { + error = -EAGAIN; + goto out_unlock; + } + ASSERT(offset <= mp->m_super->s_maxbytes); if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) length = mp->m_super->s_maxbytes - offset; @@ -1016,6 +1021,15 @@ xfs_file_iomap_begin( if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { if (flags & IOMAP_DIRECT) { + /* + * A reflinked inode will result in CoW alloc. + * FIXME: It could still overwrite on unshared extents + * and not need allocation. + */ + if (flags & IOMAP_NOWAIT) { + error = -EAGAIN; + goto out_unlock; + } /* may drop and re-acquire the ilock */ error = xfs_reflink_allocate_cow(ip, &imap, &shared, &lockmode); @@ -1033,6 +1047,14 @@ xfs_file_iomap_begin( if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { /* + * If nowait is set bail since we are going to make + * allocations. + */ + if (flags & IOMAP_NOWAIT) { + error = -EAGAIN; + goto out_unlock; + } + /* * We cap the maximum length we map here to MAX_WRITEBACK_PAGES * pages to keep the chunks of work done where somewhat symmetric * with the work writeback does. This is a completely arbitrary diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 044fb0e15390..2d167fe643ec 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -19,6 +19,7 @@ #define __XFS_LINUX__ #include <linux/types.h> +#include <linux/uuid.h> /* * Kernel specific type declarations for XFS @@ -42,7 +43,6 @@ typedef __u32 xfs_nlink_t; #include "kmem.h" #include "mrlock.h" -#include "uuid.h" #include <linux/semaphore.h> #include <linux/mm.h> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index cd0b077deb35..8cec1e5505a4 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -352,13 +352,13 @@ xlog_header_check_mount( { ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); - if (uuid_is_nil(&head->h_fs_uuid)) { + if (uuid_is_null(&head->h_fs_uuid)) { /* * IRIX doesn't write the h_fs_uuid or h_fmt fields. If - * h_fs_uuid is nil, we assume this log was last mounted + * h_fs_uuid is null, we assume this log was last mounted * by IRIX and continue. */ - xfs_warn(mp, "nil uuid in log - IRIX style log"); + xfs_warn(mp, "null uuid in log - IRIX style log"); } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { xfs_warn(mp, "log has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2eaf81859166..d249546da15e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -74,20 +74,19 @@ xfs_uuid_mount( int hole, i; /* Publish UUID in struct super_block */ - BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t)); - memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t)); + uuid_copy(&mp->m_super->s_uuid, uuid); if (mp->m_flags & XFS_MOUNT_NOUUID) return 0; - if (uuid_is_nil(uuid)) { - xfs_warn(mp, "Filesystem has nil UUID - can't mount"); + if (uuid_is_null(uuid)) { + xfs_warn(mp, "Filesystem has null UUID - can't mount"); return -EINVAL; } mutex_lock(&xfs_uuid_table_mutex); for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) { - if (uuid_is_nil(&xfs_uuid_table[i])) { + if (uuid_is_null(&xfs_uuid_table[i])) { hole = i; continue; } @@ -124,7 +123,7 @@ xfs_uuid_unmount( mutex_lock(&xfs_uuid_table_mutex); for (i = 0; i < xfs_uuid_table_size; i++) { - if (uuid_is_nil(&xfs_uuid_table[i])) + if (uuid_is_null(&xfs_uuid_table[i])) continue; if (!uuid_equal(uuid, &xfs_uuid_table[i])) continue; @@ -793,7 +792,10 @@ xfs_mountfs( * Copies the low order bits of the timestamp and the randomly * set "sequence" number out of a UUID. */ - uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); + mp->m_fixedfsid[0] = + (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) | + get_unaligned_be16(&sbp->sb_uuid.b[4]); + mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]); mp->m_dmevmask = 0; /* not persistent; set after each mount */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 455a575f101d..97df4db13b2e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1766,7 +1766,8 @@ STATIC int __init xfs_init_zones(void) { xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE, - offsetof(struct xfs_ioend, io_inline_bio)); + offsetof(struct xfs_ioend, io_inline_bio), + BIOSET_NEED_BVECS); if (!xfs_ioend_bioset) goto out; |