summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-08 19:13:35 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-08 19:13:35 +0200
commit4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (patch)
tree3bc9729eabe79c6164cd29a5d605000bc82bf837 /include
parentMerge branch 'akpm' (incoming from Andrew) (diff)
parentrelay: move remove_buf_file inside relay_close_buf (diff)
downloadlinux-4de13d7aa8f4d02f4dc99d4609575659f92b3c5a.tar.xz
linux-4de13d7aa8f4d02f4dc99d4609575659f92b3c5a.zip
Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block
Pull block core updates from Jens Axboe: - Major bit is Kents prep work for immutable bio vecs. - Stable candidate fix for a scheduling-while-atomic in the queue bypass operation. - Fix for the hang on exceeded rq->datalen 32-bit unsigned when merging discard bios. - Tejuns changes to convert the writeback thread pool to the generic workqueue mechanism. - Runtime PM framework, SCSI patches exists on top of these in James' tree. - A few random fixes. * 'for-3.10/core' of git://git.kernel.dk/linux-block: (40 commits) relay: move remove_buf_file inside relay_close_buf partitions/efi.c: replace useless kzalloc's by kmalloc's fs/block_dev.c: fix iov_shorten() criteria in blkdev_aio_read() block: fix max discard sectors limit blkcg: fix "scheduling while atomic" in blk_queue_bypass_start Documentation: cfq-iosched: update documentation help for cfq tunables writeback: expose the bdi_wq workqueue writeback: replace custom worker pool implementation with unbound workqueue writeback: remove unused bdi_pending_list aoe: Fix unitialized var usage bio-integrity: Add explicit field for owner of bip_buf block: Add an explicit bio flag for bios that own their bvec block: Add bio_alloc_pages() block: Convert some code to bio_for_each_segment_all() block: Add bio_for_each_segment_all() bounce: Refactor __blk_queue_bounce to not use bi_io_vec raid1: use bio_copy_data() pktcdvd: Use bio_reset() in disabled code to kill bi_idx usage pktcdvd: use bio_copy_data() block: Add bio_copy_data() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev.h16
-rw-r--r--include/linux/bio.h115
-rw-r--r--include/linux/blk_types.h5
-rw-r--r--include/linux/blkdev.h29
-rw-r--r--include/trace/events/block.h12
-rw-r--r--include/trace/events/writeback.h5
6 files changed, 115 insertions, 67 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 350459910fe1..c3881553f7d1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,6 +18,7 @@
#include <linux/writeback.h>
#include <linux/atomic.h>
#include <linux/sysctl.h>
+#include <linux/workqueue.h>
struct page;
struct device;
@@ -27,7 +28,6 @@ struct dentry;
* Bits in backing_dev_info.state
*/
enum bdi_state {
- BDI_pending, /* On its way to being activated */
BDI_wb_alloc, /* Default embedded wb allocated */
BDI_async_congested, /* The async (write) queue is getting full */
BDI_sync_congested, /* The sync queue is getting full */
@@ -53,10 +53,8 @@ struct bdi_writeback {
unsigned int nr;
unsigned long last_old_flush; /* last old data flush */
- unsigned long last_active; /* last time bdi thread was active */
- struct task_struct *task; /* writeback thread */
- struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
+ struct delayed_work dwork; /* work item used for writeback */
struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */
@@ -123,14 +121,15 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
enum wb_reason reason);
void bdi_start_background_writeback(struct backing_dev_info *bdi);
-int bdi_writeback_thread(void *data);
+void bdi_writeback_workfn(struct work_struct *work);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
extern spinlock_t bdi_lock;
extern struct list_head bdi_list;
-extern struct list_head bdi_pending_list;
+
+extern struct workqueue_struct *bdi_wq;
static inline int wb_has_dirty_io(struct bdi_writeback *wb)
{
@@ -336,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
return bdi->capabilities & BDI_CAP_SWAP_BACKED;
}
-static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
-{
- return bdi == &default_backing_dev_info;
-}
-
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 820e7aaad4fd..ef24466d8f82 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -67,6 +67,7 @@
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
+#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio)))
static inline unsigned int bio_cur_bytes(struct bio *bio)
{
@@ -84,11 +85,6 @@ static inline void *bio_data(struct bio *bio)
return NULL;
}
-static inline int bio_has_allocated_vec(struct bio *bio)
-{
- return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
-}
-
/*
* will die
*/
@@ -136,16 +132,27 @@ static inline int bio_has_allocated_vec(struct bio *bio)
#define bio_io_error(bio) bio_endio((bio), -EIO)
/*
- * drivers should not use the __ version unless they _really_ want to
- * run through the entire bio and not just pending pieces
+ * drivers should not use the __ version unless they _really_ know what
+ * they're doing
*/
#define __bio_for_each_segment(bvl, bio, i, start_idx) \
for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \
i < (bio)->bi_vcnt; \
bvl++, i++)
+/*
+ * drivers should _never_ use the all version - the bio may have been split
+ * before it got to the driver and the driver won't own all of it
+ */
+#define bio_for_each_segment_all(bvl, bio, i) \
+ for (i = 0; \
+ bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
+ i++)
+
#define bio_for_each_segment(bvl, bio, i) \
- __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx)
+ for (i = (bio)->bi_idx; \
+ bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
+ i++)
/*
* get a reference to a bio, so it won't disappear. the intended use is
@@ -180,9 +187,12 @@ struct bio_integrity_payload {
unsigned short bip_slab; /* slab the bip came from */
unsigned short bip_vcnt; /* # of integrity bio_vecs */
unsigned short bip_idx; /* current bip_vec index */
+ unsigned bip_owns_buf:1; /* should free bip_buf */
struct work_struct bip_work; /* I/O completion */
- struct bio_vec bip_vec[0]; /* embedded bvec array */
+
+ struct bio_vec *bip_vec;
+ struct bio_vec bip_inline_vecs[0];/* embedded bvec array */
};
#endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -211,6 +221,7 @@ extern void bio_pair_release(struct bio_pair *dbio);
extern struct bio_set *bioset_create(unsigned int, unsigned int);
extern void bioset_free(struct bio_set *);
+extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
extern void bio_put(struct bio *);
@@ -245,6 +256,9 @@ extern void bio_endio(struct bio *, int);
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
+extern int submit_bio_wait(int rw, struct bio *bio);
+extern void bio_advance(struct bio *, unsigned);
+
extern void bio_init(struct bio *);
extern void bio_reset(struct bio *);
@@ -279,6 +293,9 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
}
#endif
+extern void bio_copy_data(struct bio *dst, struct bio *src);
+extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
+
extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
unsigned long, unsigned int, int, gfp_t);
extern struct bio *bio_copy_user_iov(struct request_queue *,
@@ -286,8 +303,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
int, int, gfp_t);
extern int bio_uncopy_user(struct bio *);
void zero_fill_bio(struct bio *bio);
-extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
-extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
+extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
+extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
extern unsigned int bvec_nr_vecs(unsigned short idx);
#ifdef CONFIG_BLK_CGROUP
@@ -298,39 +315,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
static inline void bio_disassociate_task(struct bio *bio) { }
#endif /* CONFIG_BLK_CGROUP */
-/*
- * bio_set is used to allow other portions of the IO system to
- * allocate their own private memory pools for bio and iovec structures.
- * These memory pools in turn all allocate from the bio_slab
- * and the bvec_slabs[].
- */
-#define BIO_POOL_SIZE 2
-#define BIOVEC_NR_POOLS 6
-#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
-
-struct bio_set {
- struct kmem_cache *bio_slab;
- unsigned int front_pad;
-
- mempool_t *bio_pool;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- mempool_t *bio_integrity_pool;
-#endif
- mempool_t *bvec_pool;
-};
-
-struct biovec_slab {
- int nr_vecs;
- char *name;
- struct kmem_cache *slab;
-};
-
-/*
- * a small number of entries is fine, not going to be performance critical.
- * basically we just need to survive
- */
-#define BIO_SPLIT_ENTRIES 2
-
#ifdef CONFIG_HIGHMEM
/*
* remember never ever reenable interrupts between a bvec_kmap_irq and
@@ -527,6 +511,49 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
return bio;
}
+/*
+ * bio_set is used to allow other portions of the IO system to
+ * allocate their own private memory pools for bio and iovec structures.
+ * These memory pools in turn all allocate from the bio_slab
+ * and the bvec_slabs[].
+ */
+#define BIO_POOL_SIZE 2
+#define BIOVEC_NR_POOLS 6
+#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
+
+struct bio_set {
+ struct kmem_cache *bio_slab;
+ unsigned int front_pad;
+
+ mempool_t *bio_pool;
+ mempool_t *bvec_pool;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ mempool_t *bio_integrity_pool;
+ mempool_t *bvec_integrity_pool;
+#endif
+
+ /*
+ * Deadlock avoidance for stacking block drivers: see comments in
+ * bio_alloc_bioset() for details
+ */
+ spinlock_t rescue_lock;
+ struct bio_list rescue_list;
+ struct work_struct rescue_work;
+ struct workqueue_struct *rescue_workqueue;
+};
+
+struct biovec_slab {
+ int nr_vecs;
+ char *name;
+ struct kmem_cache *slab;
+};
+
+/*
+ * a small number of entries is fine, not going to be performance critical.
+ * basically we just need to survive
+ */
+#define BIO_SPLIT_ENTRIES 2
+
#if defined(CONFIG_BLK_DEV_INTEGRITY)
#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 22990cf4439d..fa1abeb45b76 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -118,6 +118,7 @@ struct bio {
* BIO_POOL_IDX()
*/
#define BIO_RESET_BITS 13
+#define BIO_OWNS_VEC 13 /* bio_free() should free bvec */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
@@ -176,6 +177,7 @@ enum rq_flag_bits {
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_KERNEL, /* direct IO to kernel pages */
+ __REQ_PM, /* runtime pm request */
__REQ_NR_BITS, /* stops here */
};
@@ -198,6 +200,8 @@ enum rq_flag_bits {
REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK
+#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME)
+
/* This mask is used for both bio and request merge checking */
#define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
@@ -224,5 +228,6 @@ enum rq_flag_bits {
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
#define REQ_SECURE (1 << __REQ_SECURE)
#define REQ_KERNEL (1 << __REQ_KERNEL)
+#define REQ_PM (1 << __REQ_PM)
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e38cfe77f7f0..2fdb4a451b49 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -361,6 +361,12 @@ struct request_queue {
*/
struct kobject kobj;
+#ifdef CONFIG_PM_RUNTIME
+ struct device *dev;
+ int rpm_status;
+ unsigned int nr_pending;
+#endif
+
/*
* queue settings
*/
@@ -838,7 +844,7 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
unsigned int cmd_flags)
{
if (unlikely(cmd_flags & REQ_DISCARD))
- return q->limits.max_discard_sectors;
+ return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
if (unlikely(cmd_flags & REQ_WRITE_SAME))
return q->limits.max_write_same_sectors;
@@ -961,6 +967,27 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);
/*
+ * block layer runtime pm functions
+ */
+#ifdef CONFIG_PM_RUNTIME
+extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
+extern int blk_pre_runtime_suspend(struct request_queue *q);
+extern void blk_post_runtime_suspend(struct request_queue *q, int err);
+extern void blk_pre_runtime_resume(struct request_queue *q);
+extern void blk_post_runtime_resume(struct request_queue *q, int err);
+#else
+static inline void blk_pm_runtime_init(struct request_queue *q,
+ struct device *dev) {}
+static inline int blk_pre_runtime_suspend(struct request_queue *q)
+{
+ return -ENOSYS;
+}
+static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
+static inline void blk_pre_runtime_resume(struct request_queue *q) {}
+static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
+#endif
+
+/*
* blk_plug permits building a queue of related requests by holding the I/O
* fragments for a short period. This allows merging of sequential requests
* into single larger request. As the requests are moved from a per-task list to
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 9c1467357b03..60ae7c3db912 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce,
__entry->dev = bio->bi_bdev ?
bio->bi_bdev->bd_dev : 0;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete,
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
__entry->error = error;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
),
@@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue,
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
TP_fast_assign(
__entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
__entry->sector = bio ? bio->bi_sector : 0;
- __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
+ __entry->nr_sector = bio ? bio_sectors(bio) : 0;
blk_fill_rwbs(__entry->rwbs,
bio ? bio->bi_rw : 0, __entry->nr_sector);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
@@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap,
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
__entry->old_dev = dev;
__entry->old_sector = from;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 6a16fd2e70ed..464ea82e10db 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class,
DEFINE_EVENT(writeback_work_class, name, \
TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
TP_ARGS(bdi, work))
-DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
@@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \
DEFINE_WRITEBACK_EVENT(writeback_nowork);
DEFINE_WRITEBACK_EVENT(writeback_wake_background);
-DEFINE_WRITEBACK_EVENT(writeback_wake_thread);
-DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread);
DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
-DEFINE_WRITEBACK_EVENT(writeback_thread_start);
-DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
DECLARE_EVENT_CLASS(wbc_class,
TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),