diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 8 | ||||
-rw-r--r-- | block/blk-core.c | 67 | ||||
-rw-r--r-- | block/blk-lib.c | 104 | ||||
-rw-r--r-- | block/blk-merge.c | 53 | ||||
-rw-r--r-- | block/blk-settings.c | 16 | ||||
-rw-r--r-- | block/blk-sysfs.c | 44 | ||||
-rw-r--r-- | block/blk-tag.c | 6 | ||||
-rw-r--r-- | block/blk-throttle.c | 14 | ||||
-rw-r--r-- | block/blk.h | 5 | ||||
-rw-r--r-- | block/elevator.c | 6 | ||||
-rw-r--r-- | block/genhd.c | 14 | ||||
-rw-r--r-- | block/ioctl.c | 29 | ||||
-rw-r--r-- | block/partitions/ibm.c | 455 |
13 files changed, 535 insertions, 286 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f3b44a65fc7a..cafcd7431189 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -737,6 +737,14 @@ struct cgroup_subsys blkio_subsys = { .subsys_id = blkio_subsys_id, .base_cftypes = blkcg_files, .module = THIS_MODULE, + + /* + * blkio subsystem is utterly broken in terms of hierarchy support. + * It treats all cgroups equally regardless of where they're + * located in the hierarchy - all cgroups are treated as if they're + * right below the root. Fix it and remove the following. + */ + .broken_hierarchy = true, }; EXPORT_SYMBOL_GPL(blkio_subsys); diff --git a/block/blk-core.c b/block/blk-core.c index 4b4dbdfbca89..a33870b1847b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -262,7 +262,7 @@ EXPORT_SYMBOL(blk_start_queue); **/ void blk_stop_queue(struct request_queue *q) { - __cancel_delayed_work(&q->delay_work); + cancel_delayed_work(&q->delay_work); queue_flag_set(QUEUE_FLAG_STOPPED, q); } EXPORT_SYMBOL(blk_stop_queue); @@ -319,10 +319,8 @@ EXPORT_SYMBOL(__blk_run_queue); */ void blk_run_queue_async(struct request_queue *q) { - if (likely(!blk_queue_stopped(q))) { - __cancel_delayed_work(&q->delay_work); - queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); - } + if (likely(!blk_queue_stopped(q))) + mod_delayed_work(kblockd_workqueue, &q->delay_work, 0); } EXPORT_SYMBOL(blk_run_queue_async); @@ -608,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) /* * A queue starts its life with bypass turned on to avoid * unnecessary bypass on/off overhead and nasty surprises during - * init. The initial bypass will be finished at the end of - * blk_init_allocated_queue(). + * init. The initial bypass will be finished when the queue is + * registered by blk_register_queue(). */ q->bypass_depth = 1; __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); @@ -696,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->request_fn = rfn; q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; - q->queue_flags = QUEUE_FLAG_DEFAULT; + q->queue_flags |= QUEUE_FLAG_DEFAULT; /* Override internal queue lock with supplied lock pointer */ if (lock) @@ -712,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) return NULL; - - blk_queue_congestion_threshold(q); - - /* all done, end the initial bypass */ - blk_queue_bypass_end(q); return q; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1659,8 +1652,8 @@ generic_make_request_checks(struct bio *bio) goto end_io; } - if (unlikely(!(bio->bi_rw & REQ_DISCARD) && - nr_sectors > queue_max_hw_sectors(q))) { + if (likely(bio_is_rw(bio) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), @@ -1701,8 +1694,12 @@ generic_make_request_checks(struct bio *bio) if ((bio->bi_rw & REQ_DISCARD) && (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && - !blk_queue_secdiscard(q)))) { + ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { + err = -EOPNOTSUPP; + goto end_io; + } + + if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { err = -EOPNOTSUPP; goto end_io; } @@ -1812,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request); */ void submit_bio(int rw, struct bio *bio) { - int count = bio_sectors(bio); - bio->bi_rw |= rw; /* * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. */ - if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { + if (bio_has_data(bio)) { + unsigned int count; + + if (unlikely(rw & REQ_WRITE_SAME)) + count = bdev_logical_block_size(bio->bi_bdev) >> 9; + else + count = bio_sectors(bio); + if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { @@ -1866,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { - if (rq->cmd_flags & REQ_DISCARD) + if (!rq_mergeable(rq)) return 0; - if (blk_rq_sectors(rq) > queue_max_sectors(q) || - blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { + if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } @@ -2254,9 +2255,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) error_type = "I/O"; break; } - printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", - error_type, req->rq_disk ? req->rq_disk->disk_name : "?", - (unsigned long long)blk_rq_pos(req)); + printk_ratelimited(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", + error_type, req->rq_disk ? + req->rq_disk->disk_name : "?", + (unsigned long long)blk_rq_pos(req)); + } blk_account_io_completion(req, nr_bytes); @@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->buffer = bio_data(req->bio); /* update sector only for requests with clear definition of sector */ - if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) + if (req->cmd_type == REQ_TYPE_FS) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ @@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, blk_rq_init(NULL, rq); __rq_for_each_bio(bio_src, rq_src) { - bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); + bio = bio_clone_bioset(bio_src, gfp_mask, bs); if (!bio) goto free_and_out; - __bio_clone(bio, bio_src); - - if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask, bs)) - goto free_and_out; - if (bio_ctr && bio_ctr(bio, bio_src, data)) goto free_and_out; @@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, free_and_out: if (bio) - bio_free(bio, bs); + bio_put(bio); blk_rq_unprep_clone(rq); return -ENOMEM; diff --git a/block/blk-lib.c b/block/blk-lib.c index 19cc761cacb2..9373b58dfab1 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, EXPORT_SYMBOL(blkdev_issue_discard); /** + * blkdev_issue_write_same - queue a write same operation + * @bdev: target blockdev + * @sector: start sector + * @nr_sects: number of sectors to write + * @gfp_mask: memory allocation flags (for bio_alloc) + * @page: page containing data to write + * + * Description: + * Issue a write same request for the sectors in question. + */ +int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, + struct page *page) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_write_same_sectors; + struct bio_batch bb; + struct bio *bio; + int ret = 0; + + if (!q) + return -ENXIO; + + max_write_same_sectors = q->limits.max_write_same_sectors; + + if (max_write_same_sectors == 0) + return -EOPNOTSUPP; + + atomic_set(&bb.done, 1); + bb.flags = 1 << BIO_UPTODATE; + bb.wait = &wait; + + while (nr_sects) { + bio = bio_alloc(gfp_mask, 1); + if (!bio) { + ret = -ENOMEM; + break; + } + + bio->bi_sector = sector; + bio->bi_end_io = bio_batch_end_io; + bio->bi_bdev = bdev; + bio->bi_private = &bb; + bio->bi_vcnt = 1; + bio->bi_io_vec->bv_page = page; + bio->bi_io_vec->bv_offset = 0; + bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); + + if (nr_sects > max_write_same_sectors) { + bio->bi_size = max_write_same_sectors << 9; + nr_sects -= max_write_same_sectors; + sector += max_write_same_sectors; + } else { + bio->bi_size = nr_sects << 9; + nr_sects = 0; + } + + atomic_inc(&bb.done); + submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); + } + + /* Wait for bios in-flight */ + if (!atomic_dec_and_test(&bb.done)) + wait_for_completion(&wait); + + if (!test_bit(BIO_UPTODATE, &bb.flags)) + ret = -ENOTSUPP; + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_write_same); + +/** * blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue * @sector: start sector @@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard); * Generate and issue number of bios with zerofiled pages. */ -int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, +int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask) { int ret; @@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, return ret; } + +/** + * blkdev_issue_zeroout - zero-fill a block range + * @bdev: blockdev to write + * @sector: start sector + * @nr_sects: number of sectors to write + * @gfp_mask: memory allocation flags (for bio_alloc) + * + * Description: + * Generate and issue number of bios with zerofiled pages. + */ + +int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask) +{ + if (bdev_write_same(bdev)) { + unsigned char bdn[BDEVNAME_SIZE]; + + if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, + ZERO_PAGE(0))) + return 0; + + bdevname(bdev, bdn); + pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); + } + + return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); +} EXPORT_SYMBOL(blkdev_issue_zeroout); diff --git a/block/blk-merge.c b/block/blk-merge.c index e76279e41162..936a110de0b9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -275,14 +275,8 @@ no_merge: int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { - unsigned short max_sectors; - - if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) - max_sectors = queue_max_hw_sectors(q); - else - max_sectors = queue_max_sectors(q); - - if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { - unsigned short max_sectors; - - if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) - max_sectors = queue_max_hw_sectors(q); - else - max_sectors = queue_max_sectors(q); - - - if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, /* * Will it become too large? */ - if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) + if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > + blk_rq_get_max_sectors(req)) return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; @@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (!rq_mergeable(req) || !rq_mergeable(next)) return 0; - /* - * Don't merge file system requests and discard requests - */ - if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD)) - return 0; - - /* - * Don't merge discard requests and secure discard requests - */ - if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE)) + if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) return 0; /* @@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, || next->special) return 0; + if (req->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(req->bio, next->bio)) + return 0; + /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn @@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, bool blk_rq_merge_ok(struct request *rq, struct bio *bio) { - if (!rq_mergeable(rq)) + if (!rq_mergeable(rq) || !bio_mergeable(bio)) return false; - /* don't merge file system requests and discard requests */ - if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) - return false; - - /* don't merge discard requests and secure discard requests */ - if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE)) + if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) return false; /* different data direction or already started, don't merge */ @@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_integrity(bio) != blk_integrity_rq(rq)) return false; + /* must be using the same buffer */ + if (rq->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(rq->bio, bio)) + return false; + return true; } diff --git a/block/blk-settings.c b/block/blk-settings.c index 565a6786032f..779bb7646bcd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; + lim->max_write_same_sectors = 0; lim->max_discard_sectors = 0; lim->discard_granularity = 0; lim->discard_alignment = 0; @@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_segments = USHRT_MAX; lim->max_hw_sectors = UINT_MAX; lim->max_sectors = UINT_MAX; + lim->max_write_same_sectors = UINT_MAX; } EXPORT_SYMBOL(blk_set_stacking_limits); @@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, EXPORT_SYMBOL(blk_queue_max_discard_sectors); /** + * blk_queue_max_write_same_sectors - set max sectors for a single write same + * @q: the request queue for the device + * @max_write_same_sectors: maximum number of sectors to write per command + **/ +void blk_queue_max_write_same_sectors(struct request_queue *q, + unsigned int max_write_same_sectors) +{ + q->limits.max_write_same_sectors = max_write_same_sectors; +} +EXPORT_SYMBOL(blk_queue_max_write_same_sectors); + +/** * blk_queue_max_segments - set max hw segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments @@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); + t->max_write_same_sectors = min(t->max_write_same_sectors, + b->max_write_same_sectors); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9628b291f960..ce6204608822 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page) static ssize_t queue_var_store(unsigned long *var, const char *page, size_t count) { - char *p = (char *) page; + int err; + unsigned long v; + + err = strict_strtoul(page, 10, &v); + if (err || v > UINT_MAX) + return -EINVAL; + + *var = v; - *var = simple_strtoul(p, &p, 10); return count; } @@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) return -EINVAL; ret = queue_var_store(&nr, page, count); + if (ret < 0) + return ret; + if (nr < BLKDEV_MIN_RQ) nr = BLKDEV_MIN_RQ; @@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) unsigned long ra_kb; ssize_t ret = queue_var_store(&ra_kb, page, count); + if (ret < 0) + return ret; + q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); return ret; @@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag return queue_var_show(queue_discard_zeroes_data(q), page); } +static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_write_same_sectors << 9); +} + + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) page_kb = 1 << (PAGE_CACHE_SHIFT - 10); ssize_t ret = queue_var_store(&max_sectors_kb, page, count); + if (ret < 0) + return ret; + if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) return -EINVAL; @@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, unsigned long nm; ssize_t ret = queue_var_store(&nm, page, count); + if (ret < 0) + return ret; + spin_lock_irq(q->queue_lock); queue_flag_clear(QUEUE_FLAG_NOMERGES, q); queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); @@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) unsigned long val; ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + spin_lock_irq(q->queue_lock); if (val == 2) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); @@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { .show = queue_discard_zeroes_data_show, }; +static struct queue_sysfs_entry queue_write_same_max_entry = { + .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, + .show = queue_write_same_max_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_show_nonrot, @@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = { &queue_discard_granularity_entry.attr, &queue_discard_max_entry.attr, &queue_discard_zeroes_data_entry.attr, + &queue_write_same_max_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, @@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk) if (WARN_ON(!q)) return -ENXIO; + /* + * Initialization must be complete by now. Finish the initial + * bypass from queue allocation. + */ + blk_queue_bypass_end(q); + ret = blk_trace_init_sysfs(dev); if (ret) return ret; diff --git a/block/blk-tag.c b/block/blk-tag.c index 4af6f5cc1167..cc345e1d8d4e 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth, tags = __blk_queue_init_tags(q, depth); if (!tags) - goto fail; + return -ENOMEM; + } else if (q->queue_tags) { rc = blk_queue_resize_tags(q, depth); if (rc) @@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth, queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); INIT_LIST_HEAD(&q->tag_busy_list); return 0; -fail: - kfree(tags); - return -ENOMEM; } EXPORT_SYMBOL(blk_queue_init_tags); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e287c19908c8..a9664fa0b609 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -180,7 +180,7 @@ static inline unsigned int total_nr_queued(struct throtl_data *td) /* * Worker for allocating per cpu stat for tgs. This is scheduled on the - * system_nrt_wq once there are some groups on the alloc_list waiting for + * system_wq once there are some groups on the alloc_list waiting for * allocation. */ static void tg_stats_alloc_fn(struct work_struct *work) @@ -194,8 +194,7 @@ alloc_stats: stats_cpu = alloc_percpu(struct tg_stats_cpu); if (!stats_cpu) { /* allocation failed, try again after some time */ - queue_delayed_work(system_nrt_wq, dwork, - msecs_to_jiffies(10)); + schedule_delayed_work(dwork, msecs_to_jiffies(10)); return; } } @@ -238,7 +237,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg) */ spin_lock_irqsave(&tg_stats_alloc_lock, flags); list_add(&tg->stats_alloc_node, &tg_stats_alloc_list); - queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0); + schedule_delayed_work(&tg_stats_alloc_work, 0); spin_unlock_irqrestore(&tg_stats_alloc_lock, flags); } @@ -930,12 +929,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) /* schedule work if limits changed even if no bio is queued */ if (total_nr_queued(td) || td->limits_changed) { - /* - * We might have a work scheduled to be executed in future. - * Cancel that and schedule a new one. - */ - __cancel_delayed_work(dwork); - queue_delayed_work(kthrotld_workqueue, dwork, delay); + mod_delayed_work(kthrotld_workqueue, dwork, delay); throtl_log(td, "schedule work. delay=%lu jiffies=%lu", delay, jiffies); } diff --git a/block/blk.h b/block/blk.h index 2a0ea32d249f..ca51543b248c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) * * a) it's attached to a gendisk, and * b) the queue had IO stats enabled when this request was started, and - * c) it's a file system request or a discard request + * c) it's a file system request */ static inline int blk_do_io_stat(struct request *rq) { return rq->rq_disk && (rq->cmd_flags & REQ_IO_STAT) && - (rq->cmd_type == REQ_TYPE_FS || - (rq->cmd_flags & REQ_DISCARD)); + (rq->cmd_type == REQ_TYPE_FS); } /* diff --git a/block/elevator.c b/block/elevator.c index 6a55d418896f..9b1d42b62f20 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (rq->cmd_flags & REQ_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ - if (rq->cmd_type == REQ_TYPE_FS || - (rq->cmd_flags & REQ_DISCARD)) { + if (rq->cmd_type == REQ_TYPE_FS) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } @@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (elv_attempt_insert_merge(q, rq)) break; case ELEVATOR_INSERT_SORT: - BUG_ON(rq->cmd_type != REQ_TYPE_FS && - !(rq->cmd_flags & REQ_DISCARD)); + BUG_ON(rq->cmd_type != REQ_TYPE_FS); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { diff --git a/block/genhd.c b/block/genhd.c index d839723303c8..6cace663a80e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1490,9 +1490,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) intv = disk_events_poll_jiffies(disk); set_timer_slack(&ev->dwork.timer, intv / 4); if (check_now) - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); + queue_delayed_work(system_freezable_wq, &ev->dwork, 0); else if (intv) - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); + queue_delayed_work(system_freezable_wq, &ev->dwork, intv); out_unlock: spin_unlock_irqrestore(&ev->lock, flags); } @@ -1534,10 +1534,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) spin_lock_irq(&ev->lock); ev->clearing |= mask; - if (!ev->block) { - cancel_delayed_work(&ev->dwork); - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); - } + if (!ev->block) + mod_delayed_work(system_freezable_wq, &ev->dwork, 0); spin_unlock_irq(&ev->lock); } @@ -1573,7 +1571,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) /* uncondtionally schedule event check and wait for it to finish */ disk_block_events(disk); - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); + queue_delayed_work(system_freezable_wq, &ev->dwork, 0); flush_delayed_work(&ev->dwork); __disk_unblock_events(disk, false); @@ -1610,7 +1608,7 @@ static void disk_events_workfn(struct work_struct *work) intv = disk_events_poll_jiffies(disk); if (!ev->block && intv) - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); + queue_delayed_work(system_freezable_wq, &ev->dwork, intv); spin_unlock_irq(&ev->lock); diff --git a/block/ioctl.c b/block/ioctl.c index 4476e0e85d16..a31d91d9bc5a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -41,7 +41,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user sizeof(long long) > sizeof(long)) { long pstart = start, plength = length; if (pstart != start || plength != length - || pstart < 0 || plength < 0) + || pstart < 0 || plength < 0 || partno > 65535) return -EINVAL; } @@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); } +static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, + uint64_t len) +{ + if (start & 511) + return -EINVAL; + if (len & 511) + return -EINVAL; + start >>= 9; + len >>= 9; + + if (start + len > (i_size_read(bdev->bd_inode) >> 9)) + return -EINVAL; + + return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); +} + static int put_ushort(unsigned long arg, unsigned short val) { return put_user(val, (unsigned short __user *)arg); @@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return blk_ioctl_discard(bdev, range[0], range[1], cmd == BLKSECDISCARD); } + case BLKZEROOUT: { + uint64_t range[2]; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + return blk_ioctl_zeroout(bdev, range[0], range[1]); + } case HDIO_GETGEO: { struct hd_geometry geo; diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c index 1104acac780b..47a61474e795 100644 --- a/block/partitions/ibm.c +++ b/block/partitions/ibm.c @@ -1,9 +1,8 @@ /* - * File...........: linux/fs/partitions/ibm.c * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> * Volker Sameske <sameske@de.ibm.com> * Bugreports.to..: <Linux390@de.ibm.com> - * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + * Copyright IBM Corp. 1999, 2012 */ #include <linux/buffer_head.h> @@ -17,17 +16,23 @@ #include "check.h" #include "ibm.h" + +union label_t { + struct vtoc_volume_label_cdl vol; + struct vtoc_volume_label_ldl lnx; + struct vtoc_cms_label cms; +}; + /* * compute the block number from a * cyl-cyl-head-head structure */ -static sector_t -cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) { - +static sector_t cchh2blk(struct vtoc_cchh *ptr, struct hd_geometry *geo) +{ sector_t cyl; __u16 head; - /*decode cylinder and heads for large volumes */ + /* decode cylinder and heads for large volumes */ cyl = ptr->hh & 0xFFF0; cyl <<= 12; cyl |= ptr->cc; @@ -40,13 +45,12 @@ cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) { * compute the block number from a * cyl-cyl-head-head-block structure */ -static sector_t -cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { - +static sector_t cchhb2blk(struct vtoc_cchhb *ptr, struct hd_geometry *geo) +{ sector_t cyl; __u16 head; - /*decode cylinder and heads for large volumes */ + /* decode cylinder and heads for large volumes */ cyl = ptr->hh & 0xFFF0; cyl <<= 12; cyl |= ptr->cc; @@ -56,26 +60,243 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { ptr->b; } +static int find_label(struct parsed_partitions *state, + dasd_information2_t *info, + struct hd_geometry *geo, + int blocksize, + sector_t *labelsect, + char name[], + char type[], + union label_t *label) +{ + Sector sect; + unsigned char *data; + sector_t testsect[3]; + unsigned char temp[5]; + int found = 0; + int i, testcount; + + /* There a three places where we may find a valid label: + * - on an ECKD disk it's block 2 + * - on an FBA disk it's block 1 + * - on an CMS formatted FBA disk it is sector 1, even if the block size + * is larger than 512 bytes (possible if the DIAG discipline is used) + * If we have a valid info structure, then we know exactly which case we + * have, otherwise we just search through all possebilities. + */ + if (info) { + if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) || + (info->cu_type == 0x3880 && info->dev_type == 0x3370)) + testsect[0] = info->label_block; + else + testsect[0] = info->label_block * (blocksize >> 9); + testcount = 1; + } else { + testsect[0] = 1; + testsect[1] = (blocksize >> 9); + testsect[2] = 2 * (blocksize >> 9); + testcount = 3; + } + for (i = 0; i < testcount; ++i) { + data = read_part_sector(state, testsect[i], §); + if (data == NULL) + continue; + memcpy(label, data, sizeof(*label)); + memcpy(temp, data, 4); + temp[4] = 0; + EBCASC(temp, 4); + put_dev_sector(sect); + if (!strcmp(temp, "VOL1") || + !strcmp(temp, "LNX1") || + !strcmp(temp, "CMS1")) { + if (!strcmp(temp, "VOL1")) { + strncpy(type, label->vol.vollbl, 4); + strncpy(name, label->vol.volid, 6); + } else { + strncpy(type, label->lnx.vollbl, 4); + strncpy(name, label->lnx.volid, 6); + } + EBCASC(type, 4); + EBCASC(name, 6); + *labelsect = testsect[i]; + found = 1; + break; + } + } + if (!found) + memset(label, 0, sizeof(*label)); + + return found; +} + +static int find_vol1_partitions(struct parsed_partitions *state, + struct hd_geometry *geo, + int blocksize, + char name[], + union label_t *label) +{ + sector_t blk; + int counter; + char tmp[64]; + Sector sect; + unsigned char *data; + loff_t offset, size; + struct vtoc_format1_label f1; + int secperblk; + + snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + /* + * get start of VTOC from the disk label and then search for format1 + * and format8 labels + */ + secperblk = blocksize >> 9; + blk = cchhb2blk(&label->vol.vtoc, geo) + 1; + counter = 0; + data = read_part_sector(state, blk * secperblk, §); + while (data != NULL) { + memcpy(&f1, data, sizeof(struct vtoc_format1_label)); + put_dev_sector(sect); + /* skip FMT4 / FMT5 / FMT7 labels */ + if (f1.DS1FMTID == _ascebc['4'] + || f1.DS1FMTID == _ascebc['5'] + || f1.DS1FMTID == _ascebc['7'] + || f1.DS1FMTID == _ascebc['9']) { + blk++; + data = read_part_sector(state, blk * secperblk, §); + continue; + } + /* only FMT1 and 8 labels valid at this point */ + if (f1.DS1FMTID != _ascebc['1'] && + f1.DS1FMTID != _ascebc['8']) + break; + /* OK, we got valid partition data */ + offset = cchh2blk(&f1.DS1EXT1.llimit, geo); + size = cchh2blk(&f1.DS1EXT1.ulimit, geo) - + offset + geo->sectors; + offset *= secperblk; + size *= secperblk; + if (counter >= state->limit) + break; + put_partition(state, counter + 1, offset, size); + counter++; + blk++; + data = read_part_sector(state, blk * secperblk, §); + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + + if (!data) + return -1; + + return 1; +} + +static int find_lnx1_partitions(struct parsed_partitions *state, + struct hd_geometry *geo, + int blocksize, + char name[], + union label_t *label, + sector_t labelsect, + loff_t i_size, + dasd_information2_t *info) +{ + loff_t offset, geo_size, size; + char tmp[64]; + int secperblk; + + snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + secperblk = blocksize >> 9; + if (label->lnx.ldl_version == 0xf2) { + size = label->lnx.formatted_blocks * secperblk; + } else { + /* + * Formated w/o large volume support. If the sanity check + * 'size based on geo == size based on i_size' is true, then + * we can safely assume that we know the formatted size of + * the disk, otherwise we need additional information + * that we can only get from a real DASD device. + */ + geo_size = geo->cylinders * geo->heads + * geo->sectors * secperblk; + size = i_size >> 9; + if (size != geo_size) { + if (!info) { + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; + } + if (!strcmp(info->type, "ECKD")) + if (geo_size < size) + size = geo_size; + /* else keep size based on i_size */ + } + } + /* first and only partition starts in the first block after the label */ + offset = labelsect + secperblk; + put_partition(state, 1, offset, size - offset); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; +} + +static int find_cms1_partitions(struct parsed_partitions *state, + struct hd_geometry *geo, + int blocksize, + char name[], + union label_t *label, + sector_t labelsect) +{ + loff_t offset, size; + char tmp[64]; + int secperblk; + + /* + * VM style CMS1 labeled disk + */ + blocksize = label->cms.block_size; + secperblk = blocksize >> 9; + if (label->cms.disk_offset != 0) { + snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + /* disk is reserved minidisk */ + offset = label->cms.disk_offset * secperblk; + size = (label->cms.block_count - 1) * secperblk; + } else { + snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + /* + * Special case for FBA devices: + * If an FBA device is CMS formatted with blocksize > 512 byte + * and the DIAG discipline is used, then the CMS label is found + * in sector 1 instead of block 1. However, the partition is + * still supposed to start in block 2. + */ + if (labelsect == 1) + offset = 2 * secperblk; + else + offset = labelsect + secperblk; + size = label->cms.block_count * secperblk; + } + + put_partition(state, 1, offset, size-offset); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; +} + + /* + * This is the main function, called by check.c */ int ibm_partition(struct parsed_partitions *state) { struct block_device *bdev = state->bdev; int blocksize, res; - loff_t i_size, offset, size, fmt_size; + loff_t i_size, offset, size; dasd_information2_t *info; struct hd_geometry *geo; char type[5] = {0,}; char name[7] = {0,}; - union label_t { - struct vtoc_volume_label_cdl vol; - struct vtoc_volume_label_ldl lnx; - struct vtoc_cms_label cms; - } *label; - unsigned char *data; - Sector sect; sector_t labelsect; - char tmp[64]; + union label_t *label; res = 0; blocksize = bdev_logical_block_size(bdev); @@ -84,7 +305,6 @@ int ibm_partition(struct parsed_partitions *state) i_size = i_size_read(bdev->bd_inode); if (i_size == 0) goto out_exit; - info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL); if (info == NULL) goto out_exit; @@ -94,176 +314,45 @@ int ibm_partition(struct parsed_partitions *state) label = kmalloc(sizeof(union label_t), GFP_KERNEL); if (label == NULL) goto out_nolab; - - if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 || - ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) + if (ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) goto out_freeall; - - /* - * Special case for FBA disks: label sector does not depend on - * blocksize. - */ - if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) || - (info->cu_type == 0x3880 && info->dev_type == 0x3370)) - labelsect = info->label_block; - else - labelsect = info->label_block * (blocksize >> 9); - - /* - * Get volume label, extract name and type. - */ - data = read_part_sector(state, labelsect, §); - if (data == NULL) - goto out_readerr; - - memcpy(label, data, sizeof(union label_t)); - put_dev_sector(sect); - - if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) { - strncpy(type, label->vol.vollbl, 4); - strncpy(name, label->vol.volid, 6); - } else { - strncpy(type, label->lnx.vollbl, 4); - strncpy(name, label->lnx.volid, 6); + if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0) { + kfree(info); + info = NULL; } - EBCASC(type, 4); - EBCASC(name, 6); - - res = 1; - /* - * Three different formats: LDL, CDL and unformated disk - * - * identified by info->format - * - * unformated disks we do not have to care about - */ - if (info->format == DASD_FORMAT_LDL) { - if (strncmp(type, "CMS1", 4) == 0) { - /* - * VM style CMS1 labeled disk - */ - blocksize = label->cms.block_size; - if (label->cms.disk_offset != 0) { - snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name); - strlcat(state->pp_buf, tmp, PAGE_SIZE); - /* disk is reserved minidisk */ - offset = label->cms.disk_offset; - size = (label->cms.block_count - 1) - * (blocksize >> 9); - } else { - snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name); - strlcat(state->pp_buf, tmp, PAGE_SIZE); - offset = (info->label_block + 1); - size = label->cms.block_count - * (blocksize >> 9); - } - put_partition(state, 1, offset*(blocksize >> 9), - size-offset*(blocksize >> 9)); - } else { - if (strncmp(type, "LNX1", 4) == 0) { - snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name); - strlcat(state->pp_buf, tmp, PAGE_SIZE); - if (label->lnx.ldl_version == 0xf2) { - fmt_size = label->lnx.formatted_blocks - * (blocksize >> 9); - } else if (!strcmp(info->type, "ECKD")) { - /* formated w/o large volume support */ - fmt_size = geo->cylinders * geo->heads - * geo->sectors * (blocksize >> 9); - } else { - /* old label and no usable disk geometry - * (e.g. DIAG) */ - fmt_size = i_size >> 9; - } - size = i_size >> 9; - if (fmt_size < size) - size = fmt_size; - offset = (info->label_block + 1); - } else { - /* unlabeled disk */ - strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); - size = i_size >> 9; - offset = (info->label_block + 1); - } - put_partition(state, 1, offset*(blocksize >> 9), - size-offset*(blocksize >> 9)); + if (find_label(state, info, geo, blocksize, &labelsect, name, type, + label)) { + if (!strncmp(type, "VOL1", 4)) { + res = find_vol1_partitions(state, geo, blocksize, name, + label); + } else if (!strncmp(type, "LNX1", 4)) { + res = find_lnx1_partitions(state, geo, blocksize, name, + label, labelsect, i_size, + info); + } else if (!strncmp(type, "CMS1", 4)) { + res = find_cms1_partitions(state, geo, blocksize, name, + label, labelsect); } - } else if (info->format == DASD_FORMAT_CDL) { - /* - * New style CDL formatted disk - */ - sector_t blk; - int counter; - + } else if (info) { /* - * check if VOL1 label is available - * if not, something is wrong, skipping partition detection + * ugly but needed for backward compatibility: + * If the block device is a DASD (i.e. BIODASDINFO2 works), + * then we claim it in any case, even though it has no valid + * label. If it has the LDL format, then we simply define a + * partition as if it had an LNX1 label. */ - if (strncmp(type, "VOL1", 4) == 0) { - snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name); - strlcat(state->pp_buf, tmp, PAGE_SIZE); - /* - * get block number and read then go through format1 - * labels - */ - blk = cchhb2blk(&label->vol.vtoc, geo) + 1; - counter = 0; - data = read_part_sector(state, blk * (blocksize/512), - §); - while (data != NULL) { - struct vtoc_format1_label f1; - - memcpy(&f1, data, - sizeof(struct vtoc_format1_label)); - put_dev_sector(sect); - - /* skip FMT4 / FMT5 / FMT7 labels */ - if (f1.DS1FMTID == _ascebc['4'] - || f1.DS1FMTID == _ascebc['5'] - || f1.DS1FMTID == _ascebc['7'] - || f1.DS1FMTID == _ascebc['9']) { - blk++; - data = read_part_sector(state, - blk * (blocksize/512), §); - continue; - } - - /* only FMT1 and 8 labels valid at this point */ - if (f1.DS1FMTID != _ascebc['1'] && - f1.DS1FMTID != _ascebc['8']) - break; - - /* OK, we got valid partition data */ - offset = cchh2blk(&f1.DS1EXT1.llimit, geo); - size = cchh2blk(&f1.DS1EXT1.ulimit, geo) - - offset + geo->sectors; - if (counter >= state->limit) - break; - put_partition(state, counter + 1, - offset * (blocksize >> 9), - size * (blocksize >> 9)); - counter++; - blk++; - data = read_part_sector(state, - blk * (blocksize/512), §); - } - - if (!data) - /* Are we not supposed to report this ? */ - goto out_readerr; - } else - printk(KERN_INFO "Expected Label VOL1 not " - "found, treating as CDL formated Disk"); - - } - - strlcat(state->pp_buf, "\n", PAGE_SIZE); - goto out_freeall; - + res = 1; + if (info->format == DASD_FORMAT_LDL) { + strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); + size = i_size >> 9; + offset = (info->label_block + 1) * (blocksize >> 9); + put_partition(state, 1, offset, size-offset); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + } + } else + res = 0; -out_readerr: - res = -1; out_freeall: kfree(label); out_nolab: |