diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-15 21:24:45 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-15 21:24:45 +0100 |
commit | b3c9dd182ed3bdcdaf0e42625a35924b0497afdc (patch) | |
tree | ad48ad4d923fee147c736318d0fad35b3755f4f5 /include | |
parent | Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kern... (diff) | |
parent | Revert "block: recursive merge requests" (diff) | |
download | linux-b3c9dd182ed3bdcdaf0e42625a35924b0497afdc.tar.xz linux-b3c9dd182ed3bdcdaf0e42625a35924b0497afdc.zip |
Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block
* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits)
Revert "block: recursive merge requests"
block: Stop using macro stubs for the bio data integrity calls
blockdev: convert some macros to static inlines
fs: remove unneeded plug in mpage_readpages()
block: Add BLKROTATIONAL ioctl
block: Introduce blk_set_stacking_limits function
block: remove WARN_ON_ONCE() in exit_io_context()
block: an exiting task should be allowed to create io_context
block: ioc_cgroup_changed() needs to be exported
block: recursive merge requests
block, cfq: fix empty queue crash caused by request merge
block, cfq: move icq creation and rq->elv.icq association to block core
block, cfq: restructure io_cq creation path for io_context interface cleanup
block, cfq: move io_cq exit/release to blk-ioc.c
block, cfq: move icq cache management to block core
block, cfq: move io_cq lookup to blk-ioc.c
block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq
block, cfq: reorganize cfq_io_context into generic and cfq specific parts
block: remove elevator_queue->ops
block: reorder elevator switch sequence
...
Fix up conflicts in:
- block/blk-cgroup.c
Switch from can_attach_task to can_attach
- block/cfq-iosched.c
conflict with now removed cic index changes (we now use q->id instead)
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bio.h | 66 | ||||
-rw-r--r-- | include/linux/blkdev.h | 101 | ||||
-rw-r--r-- | include/linux/elevator.h | 41 | ||||
-rw-r--r-- | include/linux/fs.h | 1 | ||||
-rw-r--r-- | include/linux/iocontext.h | 136 |
5 files changed, 252 insertions, 93 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h index 847994aef0e9..129a9c097958 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -515,24 +515,64 @@ extern void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ -#define bio_integrity(a) (0) -#define bioset_integrity_create(a, b) (0) -#define bio_integrity_prep(a) (0) -#define bio_integrity_enabled(a) (0) +static inline int bio_integrity(struct bio *bio) +{ + return 0; +} + +static inline int bio_integrity_enabled(struct bio *bio) +{ + return 0; +} + +static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) +{ + return 0; +} + +static inline void bioset_integrity_free (struct bio_set *bs) +{ + return; +} + +static inline int bio_integrity_prep(struct bio *bio) +{ + return 0; +} + +static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs) +{ + return; +} + static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask, struct bio_set *bs) { return 0; } -#define bioset_integrity_free(a) do { } while (0) -#define bio_integrity_free(a, b) do { } while (0) -#define bio_integrity_endio(a, b) do { } while (0) -#define bio_integrity_advance(a, b) do { } while (0) -#define bio_integrity_trim(a, b, c) do { } while (0) -#define bio_integrity_split(a, b, c) do { } while (0) -#define bio_integrity_set_tag(a, b, c) do { } while (0) -#define bio_integrity_get_tag(a, b, c) do { } while (0) -#define bio_integrity_init(a) do { } while (0) + +static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp, + int sectors) +{ + return; +} + +static inline void bio_integrity_advance(struct bio *bio, + unsigned int bytes_done) +{ + return; +} + +static inline void bio_integrity_trim(struct bio *bio, unsigned int offset, + unsigned int sectors) +{ + return; +} + +static inline void bio_integrity_init(void) +{ + return; +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0ed1eb062313..6c6a1f008065 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -111,10 +111,14 @@ struct request { * Three pointers are available for the IO schedulers, if they need * more they have to dynamically allocate it. Flush requests are * never put on the IO scheduler. So let the flush fields share - * space with the three elevator_private pointers. + * space with the elevator data. */ union { - void *elevator_private[3]; + struct { + struct io_cq *icq; + void *priv[2]; + } elv; + struct { unsigned int seq; struct list_head list; @@ -311,6 +315,12 @@ struct request_queue { unsigned long queue_flags; /* + * ida allocated id for this queue. Used to index queues from + * ioctx. + */ + int id; + + /* * queue needs bounce pages for pages above this limit */ gfp_t bounce_gfp; @@ -351,6 +361,8 @@ struct request_queue { struct timer_list timeout; struct list_head timeout_list; + struct list_head icq_list; + struct queue_limits limits; /* @@ -387,6 +399,9 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif +#ifdef CONFIG_LOCKDEP + int ioc_release_depth; +#endif }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -481,6 +496,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) +#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) @@ -660,7 +676,6 @@ extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern struct request *blk_make_request(struct request_queue *, struct bio *, gfp_t); -extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); @@ -829,6 +844,7 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_default_limits(struct queue_limits *lim); +extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, @@ -859,7 +875,7 @@ extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatte extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); -int blk_get_queue(struct request_queue *); +bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); @@ -1282,19 +1298,70 @@ queue_max_integrity_segments(struct request_queue *q) #else /* CONFIG_BLK_DEV_INTEGRITY */ -#define blk_integrity_rq(rq) (0) -#define blk_rq_count_integrity_sg(a, b) (0) -#define blk_rq_map_integrity_sg(a, b, c) (0) -#define bdev_get_integrity(a) (0) -#define blk_get_integrity(a) (0) -#define blk_integrity_compare(a, b) (0) -#define blk_integrity_register(a, b) (0) -#define blk_integrity_unregister(a) do { } while (0) -#define blk_queue_max_integrity_segments(a, b) do { } while (0) -#define queue_max_integrity_segments(a) (0) -#define blk_integrity_merge_rq(a, b, c) (0) -#define blk_integrity_merge_bio(a, b, c) (0) -#define blk_integrity_is_initialized(a) (0) +struct bio; +struct block_device; +struct gendisk; +struct blk_integrity; + +static inline int blk_integrity_rq(struct request *rq) +{ + return 0; +} +static inline int blk_rq_count_integrity_sg(struct request_queue *q, + struct bio *b) +{ + return 0; +} +static inline int blk_rq_map_integrity_sg(struct request_queue *q, + struct bio *b, + struct scatterlist *s) +{ + return 0; +} +static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) +{ + return 0; +} +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + return NULL; +} +static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) +{ + return 0; +} +static inline int blk_integrity_register(struct gendisk *d, + struct blk_integrity *b) +{ + return 0; +} +static inline void blk_integrity_unregister(struct gendisk *d) +{ +} +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ +} +static inline unsigned short queue_max_integrity_segments(struct request_queue *q) +{ + return 0; +} +static inline int blk_integrity_merge_rq(struct request_queue *rq, + struct request *r1, + struct request *r2) +{ + return 0; +} +static inline int blk_integrity_merge_bio(struct request_queue *rq, + struct request *r, + struct bio *b) +{ + return 0; +} +static inline bool blk_integrity_is_initialized(struct gendisk *g) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1d0f7a2ff73b..c24f3d7fbf1e 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -5,6 +5,8 @@ #ifdef CONFIG_BLOCK +struct io_cq; + typedef int (elevator_merge_fn) (struct request_queue *, struct request **, struct bio *); @@ -24,6 +26,8 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); +typedef void (elevator_init_icq_fn) (struct io_cq *); +typedef void (elevator_exit_icq_fn) (struct io_cq *); typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); typedef void (elevator_put_req_fn) (struct request *); typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); @@ -56,6 +60,9 @@ struct elevator_ops elevator_request_list_fn *elevator_former_req_fn; elevator_request_list_fn *elevator_latter_req_fn; + elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */ + elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */ + elevator_set_req_fn *elevator_set_req_fn; elevator_put_req_fn *elevator_put_req_fn; @@ -63,7 +70,6 @@ struct elevator_ops elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; - void (*trim)(struct io_context *); }; #define ELV_NAME_MAX (16) @@ -79,11 +85,20 @@ struct elv_fs_entry { */ struct elevator_type { - struct list_head list; + /* managed by elevator core */ + struct kmem_cache *icq_cache; + + /* fields provided by elevator implementation */ struct elevator_ops ops; + size_t icq_size; /* see iocontext.h */ + size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; + + /* managed by elevator core */ + char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ + struct list_head list; }; /* @@ -91,10 +106,9 @@ struct elevator_type */ struct elevator_queue { - struct elevator_ops *ops; + struct elevator_type *type; void *elevator_data; struct kobject kobj; - struct elevator_type *elevator_type; struct mutex sysfs_lock; struct hlist_head *hash; unsigned int registered:1; @@ -129,7 +143,7 @@ extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration */ -extern void elv_register(struct elevator_type *); +extern int elv_register(struct elevator_type *); extern void elv_unregister(struct elevator_type *); /* @@ -197,22 +211,5 @@ enum { INIT_LIST_HEAD(&(rq)->csd.list); \ } while (0) -/* - * io context count accounting - */ -#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val) -#define elv_ioc_count_inc(name) this_cpu_inc(name) -#define elv_ioc_count_dec(name) this_cpu_dec(name) - -#define elv_ioc_count_read(name) \ -({ \ - unsigned long __val = 0; \ - int __cpu; \ - smp_wmb(); \ - for_each_possible_cpu(__cpu) \ - __val += per_cpu(name, __cpu); \ - __val; \ -}) - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 4bc8169fb5a1..0244082d42c5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -319,6 +319,7 @@ struct inodes_stat_t { #define BLKPBSZGET _IO(0x12,123) #define BLKDISCARDZEROES _IO(0x12,124) #define BLKSECDISCARD _IO(0x12,125) +#define BLKROTATIONAL _IO(0x12,126) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 5037a0ad2312..7e1371c4bccf 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -3,32 +3,92 @@ #include <linux/radix-tree.h> #include <linux/rcupdate.h> +#include <linux/workqueue.h> -struct cfq_queue; -struct cfq_ttime { - unsigned long last_end_request; - - unsigned long ttime_total; - unsigned long ttime_samples; - unsigned long ttime_mean; +enum { + ICQ_IOPRIO_CHANGED, + ICQ_CGROUP_CHANGED, }; -struct cfq_io_context { - void *key; - - struct cfq_queue *cfqq[2]; - - struct io_context *ioc; - - struct cfq_ttime ttime; - - struct list_head queue_list; - struct hlist_node cic_list; - - void (*dtor)(struct io_context *); /* destructor */ - void (*exit)(struct io_context *); /* called on task exit */ +/* + * An io_cq (icq) is association between an io_context (ioc) and a + * request_queue (q). This is used by elevators which need to track + * information per ioc - q pair. + * + * Elevator can request use of icq by setting elevator_type->icq_size and + * ->icq_align. Both size and align must be larger than that of struct + * io_cq and elevator can use the tail area for private information. The + * recommended way to do this is defining a struct which contains io_cq as + * the first member followed by private members and using its size and + * align. For example, + * + * struct snail_io_cq { + * struct io_cq icq; + * int poke_snail; + * int feed_snail; + * }; + * + * struct elevator_type snail_elv_type { + * .ops = { ... }, + * .icq_size = sizeof(struct snail_io_cq), + * .icq_align = __alignof__(struct snail_io_cq), + * ... + * }; + * + * If icq_size is set, block core will manage icq's. All requests will + * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn() + * is called and be holding a reference to the associated io_context. + * + * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is + * called and, on destruction, ->elevator_exit_icq_fn(). Both functions + * are called with both the associated io_context and queue locks held. + * + * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding + * queue lock but the returned icq is valid only until the queue lock is + * released. Elevators can not and should not try to create or destroy + * icq's. + * + * As icq's are linked from both ioc and q, the locking rules are a bit + * complex. + * + * - ioc lock nests inside q lock. + * + * - ioc->icq_list and icq->ioc_node are protected by ioc lock. + * q->icq_list and icq->q_node by q lock. + * + * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq + * itself is protected by q lock. However, both the indexes and icq + * itself are also RCU managed and lookup can be performed holding only + * the q lock. + * + * - icq's are not reference counted. They are destroyed when either the + * ioc or q goes away. Each request with icq set holds an extra + * reference to ioc to ensure it stays until the request is completed. + * + * - Linking and unlinking icq's are performed while holding both ioc and q + * locks. Due to the lock ordering, q exit is simple but ioc exit + * requires reverse-order double lock dance. + */ +struct io_cq { + struct request_queue *q; + struct io_context *ioc; - struct rcu_head rcu_head; + /* + * q_node and ioc_node link io_cq through icq_list of q and ioc + * respectively. Both fields are unused once ioc_exit_icq() is + * called and shared with __rcu_icq_cache and __rcu_head which are + * used for RCU free of io_cq. + */ + union { + struct list_head q_node; + struct kmem_cache *__rcu_icq_cache; + }; + union { + struct hlist_node ioc_node; + struct rcu_head __rcu_head; + }; + + unsigned long changed; }; /* @@ -43,11 +103,6 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; - -#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) - unsigned short cgroup_changed; -#endif /* * For request batching @@ -55,9 +110,11 @@ struct io_context { int nr_batch_requests; /* Number of requests left in the batch */ unsigned long last_waited; /* Time last woken after wait for request */ - struct radix_tree_root radix_root; - struct hlist_head cic_list; - void __rcu *ioc_data; + struct radix_tree_root icq_tree; + struct io_cq __rcu *icq_hint; + struct hlist_head icq_list; + + struct work_struct release_work; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) @@ -76,20 +133,17 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) struct task_struct; #ifdef CONFIG_BLOCK -int put_io_context(struct io_context *ioc); +void put_io_context(struct io_context *ioc, struct request_queue *locked_q); void exit_io_context(struct task_struct *task); -struct io_context *get_io_context(gfp_t gfp_flags, int node); -struct io_context *alloc_io_context(gfp_t gfp_flags, int node); +struct io_context *get_task_io_context(struct task_struct *task, + gfp_t gfp_flags, int node); +void ioc_ioprio_changed(struct io_context *ioc, int ioprio); +void ioc_cgroup_changed(struct io_context *ioc); #else -static inline void exit_io_context(struct task_struct *task) -{ -} - struct io_context; -static inline int put_io_context(struct io_context *ioc) -{ - return 1; -} +static inline void put_io_context(struct io_context *ioc, + struct request_queue *locked_q) { } +static inline void exit_io_context(struct task_struct *task) { } #endif #endif |