summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-15 21:24:45 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-15 21:24:45 +0100
commitb3c9dd182ed3bdcdaf0e42625a35924b0497afdc (patch)
treead48ad4d923fee147c736318d0fad35b3755f4f5 /include
parentMerge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kern... (diff)
parentRevert "block: recursive merge requests" (diff)
downloadlinux-b3c9dd182ed3bdcdaf0e42625a35924b0497afdc.tar.xz
linux-b3c9dd182ed3bdcdaf0e42625a35924b0497afdc.zip
Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block
* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits) Revert "block: recursive merge requests" block: Stop using macro stubs for the bio data integrity calls blockdev: convert some macros to static inlines fs: remove unneeded plug in mpage_readpages() block: Add BLKROTATIONAL ioctl block: Introduce blk_set_stacking_limits function block: remove WARN_ON_ONCE() in exit_io_context() block: an exiting task should be allowed to create io_context block: ioc_cgroup_changed() needs to be exported block: recursive merge requests block, cfq: fix empty queue crash caused by request merge block, cfq: move icq creation and rq->elv.icq association to block core block, cfq: restructure io_cq creation path for io_context interface cleanup block, cfq: move io_cq exit/release to blk-ioc.c block, cfq: move icq cache management to block core block, cfq: move io_cq lookup to blk-ioc.c block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq block, cfq: reorganize cfq_io_context into generic and cfq specific parts block: remove elevator_queue->ops block: reorder elevator switch sequence ... Fix up conflicts in: - block/blk-cgroup.c Switch from can_attach_task to can_attach - block/cfq-iosched.c conflict with now removed cic index changes (we now use q->id instead)
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h66
-rw-r--r--include/linux/blkdev.h101
-rw-r--r--include/linux/elevator.h41
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/iocontext.h136
5 files changed, 252 insertions, 93 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 847994aef0e9..129a9c097958 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -515,24 +515,64 @@ extern void bio_integrity_init(void);
#else /* CONFIG_BLK_DEV_INTEGRITY */
-#define bio_integrity(a) (0)
-#define bioset_integrity_create(a, b) (0)
-#define bio_integrity_prep(a) (0)
-#define bio_integrity_enabled(a) (0)
+static inline int bio_integrity(struct bio *bio)
+{
+ return 0;
+}
+
+static inline int bio_integrity_enabled(struct bio *bio)
+{
+ return 0;
+}
+
+static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
+{
+ return 0;
+}
+
+static inline void bioset_integrity_free (struct bio_set *bs)
+{
+ return;
+}
+
+static inline int bio_integrity_prep(struct bio *bio)
+{
+ return 0;
+}
+
+static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+{
+ return;
+}
+
static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
gfp_t gfp_mask, struct bio_set *bs)
{
return 0;
}
-#define bioset_integrity_free(a) do { } while (0)
-#define bio_integrity_free(a, b) do { } while (0)
-#define bio_integrity_endio(a, b) do { } while (0)
-#define bio_integrity_advance(a, b) do { } while (0)
-#define bio_integrity_trim(a, b, c) do { } while (0)
-#define bio_integrity_split(a, b, c) do { } while (0)
-#define bio_integrity_set_tag(a, b, c) do { } while (0)
-#define bio_integrity_get_tag(a, b, c) do { } while (0)
-#define bio_integrity_init(a) do { } while (0)
+
+static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
+ int sectors)
+{
+ return;
+}
+
+static inline void bio_integrity_advance(struct bio *bio,
+ unsigned int bytes_done)
+{
+ return;
+}
+
+static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
+ unsigned int sectors)
+{
+ return;
+}
+
+static inline void bio_integrity_init(void)
+{
+ return;
+}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0ed1eb062313..6c6a1f008065 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -111,10 +111,14 @@ struct request {
* Three pointers are available for the IO schedulers, if they need
* more they have to dynamically allocate it. Flush requests are
* never put on the IO scheduler. So let the flush fields share
- * space with the three elevator_private pointers.
+ * space with the elevator data.
*/
union {
- void *elevator_private[3];
+ struct {
+ struct io_cq *icq;
+ void *priv[2];
+ } elv;
+
struct {
unsigned int seq;
struct list_head list;
@@ -311,6 +315,12 @@ struct request_queue {
unsigned long queue_flags;
/*
+ * ida allocated id for this queue. Used to index queues from
+ * ioctx.
+ */
+ int id;
+
+ /*
* queue needs bounce pages for pages above this limit
*/
gfp_t bounce_gfp;
@@ -351,6 +361,8 @@ struct request_queue {
struct timer_list timeout;
struct list_head timeout_list;
+ struct list_head icq_list;
+
struct queue_limits limits;
/*
@@ -387,6 +399,9 @@ struct request_queue {
/* Throttle data */
struct throtl_data *td;
#endif
+#ifdef CONFIG_LOCKDEP
+ int ioc_release_depth;
+#endif
};
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -481,6 +496,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
@@ -660,7 +676,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
extern struct request *blk_make_request(struct request_queue *, struct bio *,
gfp_t);
-extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
extern void blk_requeue_request(struct request_queue *, struct request *);
extern void blk_add_request_payload(struct request *rq, struct page *page,
unsigned int len);
@@ -829,6 +844,7 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
extern void blk_set_default_limits(struct queue_limits *lim);
+extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
@@ -859,7 +875,7 @@ extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatte
extern void blk_dump_rq_flags(struct request *, char *);
extern long nr_blockdev_pages(void);
-int blk_get_queue(struct request_queue *);
+bool __must_check blk_get_queue(struct request_queue *);
struct request_queue *blk_alloc_queue(gfp_t);
struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);
@@ -1282,19 +1298,70 @@ queue_max_integrity_segments(struct request_queue *q)
#else /* CONFIG_BLK_DEV_INTEGRITY */
-#define blk_integrity_rq(rq) (0)
-#define blk_rq_count_integrity_sg(a, b) (0)
-#define blk_rq_map_integrity_sg(a, b, c) (0)
-#define bdev_get_integrity(a) (0)
-#define blk_get_integrity(a) (0)
-#define blk_integrity_compare(a, b) (0)
-#define blk_integrity_register(a, b) (0)
-#define blk_integrity_unregister(a) do { } while (0)
-#define blk_queue_max_integrity_segments(a, b) do { } while (0)
-#define queue_max_integrity_segments(a) (0)
-#define blk_integrity_merge_rq(a, b, c) (0)
-#define blk_integrity_merge_bio(a, b, c) (0)
-#define blk_integrity_is_initialized(a) (0)
+struct bio;
+struct block_device;
+struct gendisk;
+struct blk_integrity;
+
+static inline int blk_integrity_rq(struct request *rq)
+{
+ return 0;
+}
+static inline int blk_rq_count_integrity_sg(struct request_queue *q,
+ struct bio *b)
+{
+ return 0;
+}
+static inline int blk_rq_map_integrity_sg(struct request_queue *q,
+ struct bio *b,
+ struct scatterlist *s)
+{
+ return 0;
+}
+static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
+{
+ return 0;
+}
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+ return NULL;
+}
+static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
+{
+ return 0;
+}
+static inline int blk_integrity_register(struct gendisk *d,
+ struct blk_integrity *b)
+{
+ return 0;
+}
+static inline void blk_integrity_unregister(struct gendisk *d)
+{
+}
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+ unsigned int segs)
+{
+}
+static inline unsigned short queue_max_integrity_segments(struct request_queue *q)
+{
+ return 0;
+}
+static inline int blk_integrity_merge_rq(struct request_queue *rq,
+ struct request *r1,
+ struct request *r2)
+{
+ return 0;
+}
+static inline int blk_integrity_merge_bio(struct request_queue *rq,
+ struct request *r,
+ struct bio *b)
+{
+ return 0;
+}
+static inline bool blk_integrity_is_initialized(struct gendisk *g)
+{
+ return 0;
+}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1d0f7a2ff73b..c24f3d7fbf1e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -5,6 +5,8 @@
#ifdef CONFIG_BLOCK
+struct io_cq;
+
typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
struct bio *);
@@ -24,6 +26,8 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru
typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
typedef int (elevator_may_queue_fn) (struct request_queue *, int);
+typedef void (elevator_init_icq_fn) (struct io_cq *);
+typedef void (elevator_exit_icq_fn) (struct io_cq *);
typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
typedef void (elevator_put_req_fn) (struct request *);
typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
@@ -56,6 +60,9 @@ struct elevator_ops
elevator_request_list_fn *elevator_former_req_fn;
elevator_request_list_fn *elevator_latter_req_fn;
+ elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */
+ elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */
+
elevator_set_req_fn *elevator_set_req_fn;
elevator_put_req_fn *elevator_put_req_fn;
@@ -63,7 +70,6 @@ struct elevator_ops
elevator_init_fn *elevator_init_fn;
elevator_exit_fn *elevator_exit_fn;
- void (*trim)(struct io_context *);
};
#define ELV_NAME_MAX (16)
@@ -79,11 +85,20 @@ struct elv_fs_entry {
*/
struct elevator_type
{
- struct list_head list;
+ /* managed by elevator core */
+ struct kmem_cache *icq_cache;
+
+ /* fields provided by elevator implementation */
struct elevator_ops ops;
+ size_t icq_size; /* see iocontext.h */
+ size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX];
struct module *elevator_owner;
+
+ /* managed by elevator core */
+ char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */
+ struct list_head list;
};
/*
@@ -91,10 +106,9 @@ struct elevator_type
*/
struct elevator_queue
{
- struct elevator_ops *ops;
+ struct elevator_type *type;
void *elevator_data;
struct kobject kobj;
- struct elevator_type *elevator_type;
struct mutex sysfs_lock;
struct hlist_head *hash;
unsigned int registered:1;
@@ -129,7 +143,7 @@ extern void elv_drain_elevator(struct request_queue *);
/*
* io scheduler registration
*/
-extern void elv_register(struct elevator_type *);
+extern int elv_register(struct elevator_type *);
extern void elv_unregister(struct elevator_type *);
/*
@@ -197,22 +211,5 @@ enum {
INIT_LIST_HEAD(&(rq)->csd.list); \
} while (0)
-/*
- * io context count accounting
- */
-#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val)
-#define elv_ioc_count_inc(name) this_cpu_inc(name)
-#define elv_ioc_count_dec(name) this_cpu_dec(name)
-
-#define elv_ioc_count_read(name) \
-({ \
- unsigned long __val = 0; \
- int __cpu; \
- smp_wmb(); \
- for_each_possible_cpu(__cpu) \
- __val += per_cpu(name, __cpu); \
- __val; \
-})
-
#endif /* CONFIG_BLOCK */
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4bc8169fb5a1..0244082d42c5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -319,6 +319,7 @@ struct inodes_stat_t {
#define BLKPBSZGET _IO(0x12,123)
#define BLKDISCARDZEROES _IO(0x12,124)
#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 5037a0ad2312..7e1371c4bccf 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -3,32 +3,92 @@
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
-struct cfq_queue;
-struct cfq_ttime {
- unsigned long last_end_request;
-
- unsigned long ttime_total;
- unsigned long ttime_samples;
- unsigned long ttime_mean;
+enum {
+ ICQ_IOPRIO_CHANGED,
+ ICQ_CGROUP_CHANGED,
};
-struct cfq_io_context {
- void *key;
-
- struct cfq_queue *cfqq[2];
-
- struct io_context *ioc;
-
- struct cfq_ttime ttime;
-
- struct list_head queue_list;
- struct hlist_node cic_list;
-
- void (*dtor)(struct io_context *); /* destructor */
- void (*exit)(struct io_context *); /* called on task exit */
+/*
+ * An io_cq (icq) is association between an io_context (ioc) and a
+ * request_queue (q). This is used by elevators which need to track
+ * information per ioc - q pair.
+ *
+ * Elevator can request use of icq by setting elevator_type->icq_size and
+ * ->icq_align. Both size and align must be larger than that of struct
+ * io_cq and elevator can use the tail area for private information. The
+ * recommended way to do this is defining a struct which contains io_cq as
+ * the first member followed by private members and using its size and
+ * align. For example,
+ *
+ * struct snail_io_cq {
+ * struct io_cq icq;
+ * int poke_snail;
+ * int feed_snail;
+ * };
+ *
+ * struct elevator_type snail_elv_type {
+ * .ops = { ... },
+ * .icq_size = sizeof(struct snail_io_cq),
+ * .icq_align = __alignof__(struct snail_io_cq),
+ * ...
+ * };
+ *
+ * If icq_size is set, block core will manage icq's. All requests will
+ * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn()
+ * is called and be holding a reference to the associated io_context.
+ *
+ * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is
+ * called and, on destruction, ->elevator_exit_icq_fn(). Both functions
+ * are called with both the associated io_context and queue locks held.
+ *
+ * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding
+ * queue lock but the returned icq is valid only until the queue lock is
+ * released. Elevators can not and should not try to create or destroy
+ * icq's.
+ *
+ * As icq's are linked from both ioc and q, the locking rules are a bit
+ * complex.
+ *
+ * - ioc lock nests inside q lock.
+ *
+ * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
+ * q->icq_list and icq->q_node by q lock.
+ *
+ * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
+ * itself is protected by q lock. However, both the indexes and icq
+ * itself are also RCU managed and lookup can be performed holding only
+ * the q lock.
+ *
+ * - icq's are not reference counted. They are destroyed when either the
+ * ioc or q goes away. Each request with icq set holds an extra
+ * reference to ioc to ensure it stays until the request is completed.
+ *
+ * - Linking and unlinking icq's are performed while holding both ioc and q
+ * locks. Due to the lock ordering, q exit is simple but ioc exit
+ * requires reverse-order double lock dance.
+ */
+struct io_cq {
+ struct request_queue *q;
+ struct io_context *ioc;
- struct rcu_head rcu_head;
+ /*
+ * q_node and ioc_node link io_cq through icq_list of q and ioc
+ * respectively. Both fields are unused once ioc_exit_icq() is
+ * called and shared with __rcu_icq_cache and __rcu_head which are
+ * used for RCU free of io_cq.
+ */
+ union {
+ struct list_head q_node;
+ struct kmem_cache *__rcu_icq_cache;
+ };
+ union {
+ struct hlist_node ioc_node;
+ struct rcu_head __rcu_head;
+ };
+
+ unsigned long changed;
};
/*
@@ -43,11 +103,6 @@ struct io_context {
spinlock_t lock;
unsigned short ioprio;
- unsigned short ioprio_changed;
-
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
- unsigned short cgroup_changed;
-#endif
/*
* For request batching
@@ -55,9 +110,11 @@ struct io_context {
int nr_batch_requests; /* Number of requests left in the batch */
unsigned long last_waited; /* Time last woken after wait for request */
- struct radix_tree_root radix_root;
- struct hlist_head cic_list;
- void __rcu *ioc_data;
+ struct radix_tree_root icq_tree;
+ struct io_cq __rcu *icq_hint;
+ struct hlist_head icq_list;
+
+ struct work_struct release_work;
};
static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -76,20 +133,17 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
struct task_struct;
#ifdef CONFIG_BLOCK
-int put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
void exit_io_context(struct task_struct *task);
-struct io_context *get_io_context(gfp_t gfp_flags, int node);
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+struct io_context *get_task_io_context(struct task_struct *task,
+ gfp_t gfp_flags, int node);
+void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
+void ioc_cgroup_changed(struct io_context *ioc);
#else
-static inline void exit_io_context(struct task_struct *task)
-{
-}
-
struct io_context;
-static inline int put_io_context(struct io_context *ioc)
-{
- return 1;
-}
+static inline void put_io_context(struct io_context *ioc,
+ struct request_queue *locked_q) { }
+static inline void exit_io_context(struct task_struct *task) { }
#endif
#endif