summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-12-06 19:08:59 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2019-12-06 19:08:59 +0100
commit9feb1af97e7366b512ecb9e4dd61d3252074cda3 (patch)
treeb94821803bc3c5a69b3132e82ab6ddd1fcbcbcc0 /include
parentMerge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs (diff)
parentMerge branch 'io_uring-5.5' into for-linus (diff)
downloadlinux-9feb1af97e7366b512ecb9e4dd61d3252074cda3.tar.xz
linux-9feb1af97e7366b512ecb9e4dd61d3252074cda3.zip
Merge tag 'for-linus-20191205' of git://git.kernel.dk/linux-block
Pull more block and io_uring updates from Jens Axboe: "I wasn't expecting this to be so big, and if I was, I would have used separate branches for this. Going forward I'll be doing separate branches for the current tree, just like for the next kernel version tree. In any case, this contains: - Series from Christoph that fixes an inherent race condition with zoned devices and revalidation. - null_blk zone size fix (Damien) - Fix for a regression in this merge window that caused busy spins by sending empty disk uevents (Eric) - Fix for a regression in this merge window for bfq stats (Hou) - Fix for io_uring creds allocation failure handling (me) - io_uring -ERESTARTSYS send/recvmsg fix (me) - Series that fixes the need for applications to retain state across async request punts for io_uring. This one is a bit larger than I would have hoped, but I think it's important we get this fixed for 5.5. - connect(2) improvement for io_uring, handling EINPROGRESS instead of having applications needing to poll for it (me) - Have io_uring use a hash for poll requests instead of an rbtree. This turned out to work much better in practice, so I think we should make the switch now. For some workloads, even with a fair amount of cancellations, the insertion sort is just too expensive. (me) - Various little io_uring fixes (me, Jackie, Pavel, LimingWu) - Fix for brd unaligned IO, and a warning for the future (Ming) - Fix for a bio integrity data leak (Justin) - bvec_iter_advance() improvement (Pavel) - Xen blkback page unmap fix (SeongJae) The major items in here are all well tested, and on the liburing side we continue to add regression and feature test cases. We're up to 50 topic cases now, each with anywhere from 1 to more than 10 cases in each" * tag 'for-linus-20191205' of git://git.kernel.dk/linux-block: (33 commits) block: fix memleak of bio integrity data io_uring: fix a typo in a comment bfq-iosched: Ensure bio->bi_blkg is valid before using it io_uring: hook all linked requests via link_list io_uring: fix error handling in io_queue_link_head io_uring: use hash table for poll command lookups io-wq: clear node->next on list deletion io_uring: ensure deferred timeouts copy necessary data io_uring: allow IO_SQE_* flags on IORING_OP_TIMEOUT null_blk: remove unused variable warning on !CONFIG_BLK_DEV_ZONED brd: warn on un-aligned buffer brd: remove max_hw_sectors queue limit xen/blkback: Avoid unmapping unmapped grant pages io_uring: handle connect -EINPROGRESS like -EAGAIN block: set the zone size in blk_revalidate_disk_zones atomically block: don't handle bio based drivers in blk_revalidate_disk_zones block: allocate the zone bitmaps lazily block: replace seq_zones_bitmap with conv_zones_bitmap block: simplify blkdev_nr_zones block: remove the empty line at the end of blk-zoned.c ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/blkdev.h24
-rw-r--r--include/linux/bvec.h22
-rw-r--r--include/linux/socket.h20
-rw-r--r--include/uapi/linux/io_uring.h1
4 files changed, 34 insertions, 33 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6012e2592628..47eb22a3b7f9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -357,8 +357,7 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
-
-extern unsigned int blkdev_nr_zones(struct block_device *bdev);
+unsigned int blkdev_nr_zones(struct gendisk *disk);
extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
sector_t sectors, sector_t nr_sectors,
gfp_t gfp_mask);
@@ -371,12 +370,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
#else /* CONFIG_BLK_DEV_ZONED */
-static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
-{
- return 0;
-}
-
-static inline int blk_revalidate_disk_zones(struct gendisk *disk)
+static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
{
return 0;
}
@@ -504,9 +498,9 @@ struct request_queue {
/*
* Zoned block device information for request dispatch control.
* nr_zones is the total number of zones of the device. This is always
- * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
- * bits which indicates if a zone is conventional (bit clear) or
- * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
+ * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
+ * bits which indicates if a zone is conventional (bit set) or
+ * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
* bits which indicates if a zone is write locked, that is, if a write
* request targeting the zone was dispatched. All three fields are
* initialized by the low level device driver (e.g. scsi/sd.c).
@@ -519,7 +513,7 @@ struct request_queue {
* blk_mq_unfreeze_queue().
*/
unsigned int nr_zones;
- unsigned long *seq_zones_bitmap;
+ unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock;
#endif /* CONFIG_BLK_DEV_ZONED */
@@ -724,9 +718,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
static inline bool blk_queue_zone_is_seq(struct request_queue *q,
sector_t sector)
{
- if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
+ if (!blk_queue_is_zoned(q))
return false;
- return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
+ if (!q->conv_zones_bitmap)
+ return true;
+ return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
}
#else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index a032f01e928c..679a42253170 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -87,26 +87,24 @@ struct bvec_iter_all {
static inline bool bvec_iter_advance(const struct bio_vec *bv,
struct bvec_iter *iter, unsigned bytes)
{
+ unsigned int idx = iter->bi_idx;
+
if (WARN_ONCE(bytes > iter->bi_size,
"Attempted to advance past end of bvec iter\n")) {
iter->bi_size = 0;
return false;
}
- while (bytes) {
- const struct bio_vec *cur = bv + iter->bi_idx;
- unsigned len = min3(bytes, iter->bi_size,
- cur->bv_len - iter->bi_bvec_done);
-
- bytes -= len;
- iter->bi_size -= len;
- iter->bi_bvec_done += len;
+ iter->bi_size -= bytes;
+ bytes += iter->bi_bvec_done;
- if (iter->bi_bvec_done == cur->bv_len) {
- iter->bi_bvec_done = 0;
- iter->bi_idx++;
- }
+ while (bytes && bytes >= bv[idx].bv_len) {
+ bytes -= bv[idx].bv_len;
+ idx++;
}
+
+ iter->bi_idx = idx;
+ iter->bi_bvec_done = bytes;
return true;
}
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 4bde63021c09..2d2313403101 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -378,12 +378,19 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
bool forbid_cmsg_compat);
-extern long __sys_sendmsg_sock(struct socket *sock,
- struct user_msghdr __user *msg,
+extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
unsigned int flags);
-extern long __sys_recvmsg_sock(struct socket *sock,
- struct user_msghdr __user *msg,
+extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user *uaddr,
unsigned int flags);
+extern int sendmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct iovec **iov);
+extern int recvmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct sockaddr __user **uaddr,
+ struct iovec **iov);
/* helpers which do the actual work for syscalls */
extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
@@ -399,9 +406,8 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
-extern int __sys_connect_file(struct file *file,
- struct sockaddr __user *uservaddr, int addrlen,
- int file_flags);
+extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
+ int addrlen, int file_flags);
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
int addrlen);
extern int __sys_listen(int fd, int backlog);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 4637ed1d9949..eabccb46edd1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -157,6 +157,7 @@ struct io_uring_params {
*/
#define IORING_FEAT_SINGLE_MMAP (1U << 0)
#define IORING_FEAT_NODROP (1U << 1)
+#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
/*
* io_uring_register(2) opcodes and arguments