From 771b53d033e8663abdf59704806aa856b236dcdb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 22 Oct 2019 10:25:58 -0600
Subject: io-wq: small threadpool implementation for io_uring

This adds support for io-wq, a smaller and specialized thread pool
implementation. This is meant to replace workqueues for io_uring. Among
the reasons for this addition are:

- We can assign memory context smarter and more persistently if we
  manage the life time of threads.

- We can drop various work-arounds we have in io_uring, like the
  async_list.

- We can implement hashed work insertion, to manage concurrency of
  buffered writes without needing a) an extra workqueue, or b)
  needlessly making the concurrency of said workqueue very low
  which hurts performance of multiple buffered file writers.

- We can implement cancel through signals, for cancelling
  interruptible work like read/write (or send/recv) to/from sockets.

- We need the above cancel for being able to assign and use file tables
  from a process.

- We can implement a more thorough cancel operation in general.

- We need it to move towards a syslet/threadlet model for even faster
  async execution. For that we need to take ownership of the used
  threads.

This list is just off the top of my head. Performance should be the
same, or better, at least that's what I've seen in my testing. io-wq
supports basic NUMA functionality, setting up a pool per node.

io-wq hooks up to the scheduler schedule in/out just like workqueue
and uses that to drive the need for more/less workers.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 fs/io-wq.h

(limited to 'fs/io-wq.h')

diff --git a/fs/io-wq.h b/fs/io-wq.h
new file mode 100644
index 000000000000..be8f22c8937b
--- /dev/null
+++ b/fs/io-wq.h
@@ -0,0 +1,55 @@
+#ifndef INTERNAL_IO_WQ_H
+#define INTERNAL_IO_WQ_H
+
+struct io_wq;
+
+enum {
+	IO_WQ_WORK_CANCEL	= 1,
+	IO_WQ_WORK_HAS_MM	= 2,
+	IO_WQ_WORK_HASHED	= 4,
+	IO_WQ_WORK_NEEDS_USER	= 8,
+
+	IO_WQ_HASH_SHIFT	= 24,	/* upper 8 bits are used for hash key */
+};
+
+enum io_wq_cancel {
+	IO_WQ_CANCEL_OK,	/* cancelled before started */
+	IO_WQ_CANCEL_RUNNING,	/* found, running, and attempted cancelled */
+	IO_WQ_CANCEL_NOTFOUND,	/* work not found */
+};
+
+struct io_wq_work {
+	struct list_head list;
+	void (*func)(struct io_wq_work **);
+	unsigned flags;
+};
+
+#define INIT_IO_WORK(work, _func)			\
+	do {						\
+		(work)->func = _func;			\
+		(work)->flags = 0;			\
+	} while (0)					\
+
+struct io_wq *io_wq_create(unsigned concurrency, struct mm_struct *mm);
+void io_wq_destroy(struct io_wq *wq);
+
+void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
+void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val);
+void io_wq_flush(struct io_wq *wq);
+
+void io_wq_cancel_all(struct io_wq *wq);
+enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
+
+#if defined(CONFIG_IO_WQ)
+extern void io_wq_worker_sleeping(struct task_struct *);
+extern void io_wq_worker_running(struct task_struct *);
+#else
+static inline void io_wq_worker_sleeping(struct task_struct *tsk)
+{
+}
+static inline void io_wq_worker_running(struct task_struct *tsk)
+{
+}
+#endif
+
+#endif
-- 
cgit v1.2.3