summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Makefile3
-rw-r--r--block/bfq-cgroup.c87
-rw-r--r--block/bfq-iosched.c18
-rw-r--r--block/bfq-iosched.h1
-rw-r--r--block/bio.c580
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-core.c82
-rw-r--r--block/blk-flush.c16
-rw-r--r--block/blk-ioc.c7
-rw-r--r--block/blk-iocost.c5
-rw-r--r--block/blk-map.c508
-rw-r--r--block/blk-mq-sched.c22
-rw-r--r--block/blk-mq.c59
-rw-r--r--block/blk-settings.c39
-rw-r--r--block/blk-zoned.c34
-rw-r--r--block/blk.h138
-rw-r--r--block/genhd.c255
-rw-r--r--block/ioctl.c1
-rw-r--r--block/opal_proto.h1
-rw-r--r--block/partitions/Makefile3
-rw-r--r--block/partitions/acorn.c1
-rw-r--r--block/partitions/acorn.h15
-rw-r--r--block/partitions/aix.c1
-rw-r--r--block/partitions/aix.h2
-rw-r--r--block/partitions/amiga.c11
-rw-r--r--block/partitions/amiga.h7
-rw-r--r--block/partitions/atari.h1
-rw-r--r--block/partitions/check.c198
-rw-r--r--block/partitions/check.h41
-rw-r--r--block/partitions/cmdline.c1
-rw-r--r--block/partitions/cmdline.h3
-rw-r--r--block/partitions/core.c (renamed from block/partition-generic.c)319
-rw-r--r--block/partitions/efi.h3
-rw-r--r--block/partitions/ibm.c1
-rw-r--r--block/partitions/ibm.h2
-rw-r--r--block/partitions/karma.c3
-rw-r--r--block/partitions/karma.h9
-rw-r--r--block/partitions/ldm.c6
-rw-r--r--block/partitions/ldm.h2
-rw-r--r--block/partitions/mac.h1
-rw-r--r--block/partitions/msdos.c172
-rw-r--r--block/partitions/msdos.h9
-rw-r--r--block/partitions/osf.c2
-rw-r--r--block/partitions/osf.h8
-rw-r--r--block/partitions/sgi.c7
-rw-r--r--block/partitions/sgi.h9
-rw-r--r--block/partitions/sun.c9
-rw-r--r--block/partitions/sun.h9
-rw-r--r--block/partitions/sysv68.c1
-rw-r--r--block/partitions/sysv68.h2
-rw-r--r--block/partitions/ultrix.c1
-rw-r--r--block/partitions/ultrix.h6
-rw-r--r--block/sed-opal.c2
53 files changed, 1532 insertions, 1193 deletions
diff --git a/block/Makefile b/block/Makefile
index 1a43750f4b01..206b96e9387f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,8 +8,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
- genhd.o partition-generic.o ioprio.o \
- badblocks.o partitions/ blk-rq-qos.o
+ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index f0ff6654af28..68882b9b8f11 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{
struct bfq_entity *entity = &bfqq->entity;
+ /*
+ * Get extra reference to prevent bfqq from being freed in
+ * next possible expire or deactivate.
+ */
+ bfqq->ref++;
+
/* If bfqq is empty, then bfq_bfqq_expire also invokes
* bfq_del_bfqq_busy, thereby removing bfqq and its entity
* from data structures related to current group. Otherwise we
@@ -652,12 +658,6 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
false, BFQQE_PREEMPTED);
- /*
- * get extra reference to prevent bfqq from being freed in
- * next possible deactivate
- */
- bfqq->ref++;
-
if (bfq_bfqq_busy(bfqq))
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
else if (entity->on_st_or_in_serv)
@@ -677,7 +677,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
bfq_schedule_dispatch(bfqd);
- /* release extra ref taken above */
+ /* release extra ref taken above, bfqq may happen to be freed now */
bfq_put_queue(bfqq);
}
@@ -714,10 +714,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
if (entity->sched_data != &bfqg->sched_data) {
bic_set_bfqq(bic, NULL, 0);
- bfq_log_bfqq(bfqd, async_bfqq,
- "bic_change_group: %p %d",
- async_bfqq, async_bfqq->ref);
- bfq_put_queue(async_bfqq);
+ bfq_release_process_ref(bfqd, async_bfqq);
}
}
@@ -818,39 +815,53 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
/**
* bfq_reparent_leaf_entity - move leaf entity to the root_group.
* @bfqd: the device data structure with the root group.
- * @entity: the entity to move.
+ * @entity: the entity to move, if entity is a leaf; or the parent entity
+ * of an active leaf entity to move, if entity is not a leaf.
*/
static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
- struct bfq_entity *entity)
+ struct bfq_entity *entity,
+ int ioprio_class)
{
- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ struct bfq_queue *bfqq;
+ struct bfq_entity *child_entity = entity;
+ while (child_entity->my_sched_data) { /* leaf not reached yet */
+ struct bfq_sched_data *child_sd = child_entity->my_sched_data;
+ struct bfq_service_tree *child_st = child_sd->service_tree +
+ ioprio_class;
+ struct rb_root *child_active = &child_st->active;
+
+ child_entity = bfq_entity_of(rb_first(child_active));
+
+ if (!child_entity)
+ child_entity = child_sd->in_service_entity;
+ }
+
+ bfqq = bfq_entity_to_bfqq(child_entity);
bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
}
/**
- * bfq_reparent_active_entities - move to the root group all active
- * entities.
+ * bfq_reparent_active_queues - move to the root group all active queues.
* @bfqd: the device data structure with the root group.
* @bfqg: the group to move from.
- * @st: the service tree with the entities.
+ * @st: the service tree to start the search from.
*/
-static void bfq_reparent_active_entities(struct bfq_data *bfqd,
- struct bfq_group *bfqg,
- struct bfq_service_tree *st)
+static void bfq_reparent_active_queues(struct bfq_data *bfqd,
+ struct bfq_group *bfqg,
+ struct bfq_service_tree *st,
+ int ioprio_class)
{
struct rb_root *active = &st->active;
- struct bfq_entity *entity = NULL;
-
- if (!RB_EMPTY_ROOT(&st->active))
- entity = bfq_entity_of(rb_first(active));
+ struct bfq_entity *entity;
- for (; entity ; entity = bfq_entity_of(rb_first(active)))
- bfq_reparent_leaf_entity(bfqd, entity);
+ while ((entity = bfq_entity_of(rb_first(active))))
+ bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
if (bfqg->sched_data.in_service_entity)
bfq_reparent_leaf_entity(bfqd,
- bfqg->sched_data.in_service_entity);
+ bfqg->sched_data.in_service_entity,
+ ioprio_class);
}
/**
@@ -883,13 +894,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
st = bfqg->sched_data.service_tree + i;
/*
- * The idle tree may still contain bfq_queues belonging
- * to exited task because they never migrated to a different
- * cgroup from the one being destroyed now.
- */
- bfq_flush_idle_tree(st);
-
- /*
* It may happen that some queues are still active
* (busy) upon group destruction (if the corresponding
* processes have been forced to terminate). We move
@@ -901,7 +905,20 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
* There is no need to put the sync queues, as the
* scheduler has taken no reference.
*/
- bfq_reparent_active_entities(bfqd, bfqg, st);
+ bfq_reparent_active_queues(bfqd, bfqg, st, i);
+
+ /*
+ * The idle tree may still contain bfq_queues
+ * belonging to exited task because they never
+ * migrated to a different cgroup from the one being
+ * destroyed now. In addition, even
+ * bfq_reparent_active_queues() may happen to add some
+ * entities to the idle tree. It happens if, in some
+ * of the calls to bfq_bfqq_move() performed by
+ * bfq_reparent_active_queues(), the queue to move is
+ * empty and gets expired.
+ */
+ bfq_flush_idle_tree(st);
}
__bfq_deactivate_entity(entity, false);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 8c436abfaf14..78ba57efd16b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2716,8 +2716,6 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
}
}
-
-static
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
/*
@@ -6215,20 +6213,28 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
return bfqq;
}
-static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
+static void
+bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
- struct bfq_data *bfqd = bfqq->bfqd;
enum bfqq_expiration reason;
unsigned long flags;
spin_lock_irqsave(&bfqd->lock, flags);
- bfq_clear_bfqq_wait_request(bfqq);
+ /*
+ * Considering that bfqq may be in race, we should firstly check
+ * whether bfqq is in service before doing something on it. If
+ * the bfqq in race is not in service, it has already been expired
+ * through __bfq_bfqq_expire func and its wait_request flags has
+ * been cleared in __bfq_bfqd_reset_in_service func.
+ */
if (bfqq != bfqd->in_service_queue) {
spin_unlock_irqrestore(&bfqd->lock, flags);
return;
}
+ bfq_clear_bfqq_wait_request(bfqq);
+
if (bfq_bfqq_budget_timeout(bfqq))
/*
* Also here the queue can be safely expired
@@ -6273,7 +6279,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer)
* early.
*/
if (bfqq)
- bfq_idle_slice_timer_body(bfqq);
+ bfq_idle_slice_timer_body(bfqd, bfqq);
return HRTIMER_NORESTART;
}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index d1233af9c684..cd224aaf9f52 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -955,6 +955,7 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
diff --git a/block/bio.c b/block/bio.c
index 94d697217887..21cbaa6a1c20 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -17,6 +17,7 @@
#include <linux/cgroup.h>
#include <linux/blk-cgroup.h>
#include <linux/highmem.h>
+#include <linux/sched/sysctl.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -588,6 +589,49 @@ void bio_truncate(struct bio *bio, unsigned new_size)
}
/**
+ * guard_bio_eod - truncate a BIO to fit the block device
+ * @bio: bio to truncate
+ *
+ * This allows us to do IO even on the odd last sectors of a device, even if the
+ * block size is some multiple of the physical sector size.
+ *
+ * We'll just truncate the bio to the size of the device, and clear the end of
+ * the buffer head manually. Truly out-of-range accesses will turn into actual
+ * I/O errors, this only handles the "we need to be able to do I/O at the final
+ * sector" case.
+ */
+void guard_bio_eod(struct bio *bio)
+{
+ sector_t maxsector;
+ struct hd_struct *part;
+
+ rcu_read_lock();
+ part = __disk_get_part(bio->bi_disk, bio->bi_partno);
+ if (part)
+ maxsector = part_nr_sects_read(part);
+ else
+ maxsector = get_capacity(bio->bi_disk);
+ rcu_read_unlock();
+
+ if (!maxsector)
+ return;
+
+ /*
+ * If the *whole* IO is past the end of the device,
+ * let it through, and the IO layer will turn it into
+ * an EIO.
+ */
+ if (unlikely(bio->bi_iter.bi_sector >= maxsector))
+ return;
+
+ maxsector -= bio->bi_iter.bi_sector;
+ if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
+ return;
+
+ bio_truncate(bio, maxsector << 9);
+}
+
+/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
*
@@ -679,6 +723,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
}
EXPORT_SYMBOL(bio_clone_fast);
+const char *bio_devname(struct bio *bio, char *buf)
+{
+ return disk_name(bio->bi_disk, bio->bi_partno, buf);
+}
+EXPORT_SYMBOL(bio_devname);
+
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
bool *same_page)
@@ -730,7 +780,7 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
*
* This should only be used by passthrough bios.
*/
-static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
bool *same_page)
{
@@ -1019,12 +1069,21 @@ static void submit_bio_wait_endio(struct bio *bio)
int submit_bio_wait(struct bio *bio)
{
DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
+ unsigned long hang_check;
bio->bi_private = &done;
bio->bi_end_io = submit_bio_wait_endio;
bio->bi_opf |= REQ_SYNC;
submit_bio(bio);
- wait_for_completion_io(&done);
+
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ hang_check = sysctl_hung_task_timeout_secs;
+ if (hang_check)
+ while (!wait_for_completion_io_timeout(&done,
+ hang_check * (HZ/2)))
+ ;
+ else
+ wait_for_completion_io(&done);
return blk_status_to_errno(bio->bi_status);
}
@@ -1135,90 +1194,6 @@ void bio_list_copy_data(struct bio *dst, struct bio *src)
}
EXPORT_SYMBOL(bio_list_copy_data);
-struct bio_map_data {
- int is_our_pages;
- struct iov_iter iter;
- struct iovec iov[];
-};
-
-static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- if (data->nr_segs > UIO_MAXIOV)
- return NULL;
-
- bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
- if (!bmd)
- return NULL;
- memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
- bmd->iter = *data;
- bmd->iter.iov = bmd->iov;
- return bmd;
-}
-
-/**
- * bio_copy_from_iter - copy all pages from iov_iter to bio
- * @bio: The &struct bio which describes the I/O as destination
- * @iter: iov_iter as source
- *
- * Copy all pages from iov_iter to bio.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_from_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- iter);
-
- if (!iov_iter_count(iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
-/**
- * bio_copy_to_iter - copy all pages from bio to iov_iter
- * @bio: The &struct bio which describes the I/O as source
- * @iter: iov_iter as destination
- *
- * Copy all pages from bio to iov_iter.
- * Returns 0 on success, or error on failure.
- */
-static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
-{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- ssize_t ret;
-
- ret = copy_page_to_iter(bvec->bv_page,
- bvec->bv_offset,
- bvec->bv_len,
- &iter);
-
- if (!iov_iter_count(&iter))
- break;
-
- if (ret < bvec->bv_len)
- return -EFAULT;
- }
-
- return 0;
-}
-
void bio_free_pages(struct bio *bio)
{
struct bio_vec *bvec;
@@ -1229,430 +1204,6 @@ void bio_free_pages(struct bio *bio)
}
EXPORT_SYMBOL(bio_free_pages);
-/**
- * bio_uncopy_user - finish previously mapped bio
- * @bio: bio being terminated
- *
- * Free pages allocated from bio_copy_user_iov() and write back data
- * to user space in case of a read.
- */
-int bio_uncopy_user(struct bio *bio)
-{
- struct bio_map_data *bmd = bio->bi_private;
- int ret = 0;
-
- if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
- /*
- * if we're in a workqueue, the request is orphaned, so
- * don't copy into a random user address space, just free
- * and return -EINTR so user space doesn't expect any data.
- */
- if (!current->mm)
- ret = -EINTR;
- else if (bio_data_dir(bio) == READ)
- ret = bio_copy_to_iter(bio, bmd->iter);
- if (bmd->is_our_pages)
- bio_free_pages(bio);
- }
- kfree(bmd);
- bio_put(bio);
- return ret;
-}
-
-/**
- * bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
- */
-struct bio *bio_copy_user_iov(struct request_queue *q,
- struct rq_map_data *map_data,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- struct page *page;
- struct bio *bio;
- int i = 0, ret;
- int nr_pages;
- unsigned int len = iter->count;
- unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
-
- bmd = bio_alloc_map_data(iter, gfp_mask);
- if (!bmd)
- return ERR_PTR(-ENOMEM);
-
- /*
- * We need to do a deep copy of the iov_iter including the iovecs.
- * The caller provided iov might point to an on-stack or otherwise
- * shortlived one.
- */
- bmd->is_our_pages = map_data ? 0 : 1;
-
- nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
- if (nr_pages > BIO_MAX_PAGES)
- nr_pages = BIO_MAX_PAGES;
-
- ret = -ENOMEM;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- goto out_bmd;
-
- ret = 0;
-
- if (map_data) {
- nr_pages = 1 << map_data->page_order;
- i = map_data->offset / PAGE_SIZE;
- }
- while (len) {
- unsigned int bytes = PAGE_SIZE;
-
- bytes -= offset;
-
- if (bytes > len)
- bytes = len;
-
- if (map_data) {
- if (i == map_data->nr_entries * nr_pages) {
- ret = -ENOMEM;
- break;
- }
-
- page = map_data->pages[i / nr_pages];
- page += (i % nr_pages);
-
- i++;
- } else {
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- break;
- }
- }
-
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
- if (!map_data)
- __free_page(page);
- break;
- }
-
- len -= bytes;
- offset = 0;
- }
-
- if (ret)
- goto cleanup;
-
- if (map_data)
- map_data->offset += bio->bi_iter.bi_size;
-
- /*
- * success
- */
- if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
- (map_data && map_data->from_user)) {
- ret = bio_copy_from_iter(bio, iter);
- if (ret)
- goto cleanup;
- } else {
- if (bmd->is_our_pages)
- zero_fill_bio(bio);
- iov_iter_advance(iter, bio->bi_iter.bi_size);
- }
-
- bio->bi_private = bmd;
- if (map_data && map_data->null_mapped)
- bio_set_flag(bio, BIO_NULL_MAPPED);
- return bio;
-cleanup:
- if (!map_data)
- bio_free_pages(bio);
- bio_put(bio);
-out_bmd:
- kfree(bmd);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_map_user_iov - map user iovec into bio
- * @q: the struct request_queue for the bio
- * @iter: iovec iterator
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user_iov(struct request_queue *q,
- struct iov_iter *iter,
- gfp_t gfp_mask)
-{
- int j;
- struct bio *bio;
- int ret;
-
- if (!iov_iter_count(iter))
- return ERR_PTR(-EINVAL);
-
- bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (iov_iter_count(iter)) {
- struct page **pages;
- ssize_t bytes;
- size_t offs, added = 0;
- int npages;
-
- bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
- if (unlikely(bytes <= 0)) {
- ret = bytes ? bytes : -EFAULT;
- goto out_unmap;
- }
-
- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
-
- if (unlikely(offs & queue_dma_alignment(q))) {
- ret = -EINVAL;
- j = 0;
- } else {
- for (j = 0; j < npages; j++) {
- struct page *page = pages[j];
- unsigned int n = PAGE_SIZE - offs;
- bool same_page = false;
-
- if (n > bytes)
- n = bytes;
-
- if (!__bio_add_pc_page(q, bio, page, n, offs,
- &same_page)) {
- if (same_page)
- put_page(page);
- break;
- }
-
- added += n;
- bytes -= n;
- offs = 0;
- }
- iov_iter_advance(iter, added);
- }
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (j < npages)
- put_page(pages[j++]);
- kvfree(pages);
- /* couldn't stuff something into bio? */
- if (bytes)
- break;
- }
-
- bio_set_flag(bio, BIO_USER_MAPPED);
-
- /*
- * subtle -- if bio_map_user_iov() ended up bouncing a bio,
- * it would normally disappear when its bi_end_io is run.
- * however, we need it for the unmap, so grab an extra
- * reference to it
- */
- bio_get(bio);
- return bio;
-
- out_unmap:
- bio_release_pages(bio, false);
- bio_put(bio);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_unmap_user - unmap a bio
- * @bio: the bio being unmapped
- *
- * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
- * process context.
- *
- * bio_unmap_user() may sleep.
- */
-void bio_unmap_user(struct bio *bio)
-{
- bio_release_pages(bio, bio_data_dir(bio) == READ);
- bio_put(bio);
- bio_put(bio);
-}
-
-static void bio_invalidate_vmalloc_pages(struct bio *bio)
-{
-#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
- if (bio->bi_private && !op_is_write(bio_op(bio))) {
- unsigned long i, len = 0;
-
- for (i = 0; i < bio->bi_vcnt; i++)
- len += bio->bi_io_vec[i].bv_len;
- invalidate_kernel_vmap_range(bio->bi_private, len);
- }
-#endif
-}
-
-static void bio_map_kern_endio(struct bio *bio)
-{
- bio_invalidate_vmalloc_pages(bio);
- bio_put(bio);
-}
-
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- bool is_vmalloc = is_vmalloc_addr(data);
- struct page *page;
- int offset, i;
- struct bio *bio;
-
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- if (is_vmalloc) {
- flush_kernel_vmap_range(data, len);
- bio->bi_private = data;
- }
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (!is_vmalloc)
- page = virt_to_page(data);
- else
- page = vmalloc_to_page(data);
- if (bio_add_pc_page(q, bio, page, bytes,
- offset) < bytes) {
- /* we don't support partial mappings */
- bio_put(bio);
- return ERR_PTR(-EINVAL);
- }
-
- data += bytes;
- len -= bytes;
- offset = 0;
- }
-
- bio->bi_end_io = bio_map_kern_endio;
- return bio;
-}
-
-static void bio_copy_kern_endio(struct bio *bio)
-{
- bio_free_pages(bio);
- bio_put(bio);
-}
-
-static void bio_copy_kern_endio_read(struct bio *bio)
-{
- char *p = bio->bi_private;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
- p += bvec->bv_len;
- }
-
- bio_copy_kern_endio(bio);
-}
-
-/**
- * bio_copy_kern - copy kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to copy
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio and page allocation
- * @reading: data direction is READ
- *
- * copy the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask, int reading)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- struct bio *bio;
- void *p = data;
- int nr_pages = 0;
-
- /*
- * Overflow, abort
- */
- if (end < start)
- return ERR_PTR(-EINVAL);
-
- nr_pages = end - start;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- while (len) {
- struct page *page;
- unsigned int bytes = PAGE_SIZE;
-
- if (bytes > len)
- bytes = len;
-
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page)
- goto cleanup;
-
- if (!reading)
- memcpy(page_address(page), p, bytes);
-
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
- break;
-
- len -= bytes;
- p += bytes;
- }
-
- if (reading) {
- bio->bi_end_io = bio_copy_kern_endio_read;
- bio->bi_private = data;
- } else {
- bio->bi_end_io = bio_copy_kern_endio;
- }
-
- return bio;
-
-cleanup:
- bio_free_pages(bio);
- bio_put(bio);
- return ERR_PTR(-ENOMEM);
-}
-
/*
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
* for performing direct-IO in BIOs.
@@ -1752,14 +1303,14 @@ defer:
schedule_work(&bio_dirty_work);
}
-void update_io_ticks(struct hd_struct *part, unsigned long now)
+void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{
unsigned long stamp;
again:
stamp = READ_ONCE(part->stamp);
if (unlikely(stamp != now)) {
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
- __part_stat_add(part, io_ticks, 1);
+ __part_stat_add(part, io_ticks, end ? now - stamp : 1);
}
}
if (part->partno) {
@@ -1775,7 +1326,7 @@ void generic_start_io_acct(struct request_queue *q, int op,
part_stat_lock();
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, false);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors);
part_inc_in_flight(q, part, op_is_write(op));
@@ -1793,9 +1344,8 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
part_stat_lock();
- update_io_ticks(part, now);
+ update_io_ticks(part, now, true);
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_stat_add(part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op));
part_stat_unlock();
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index a229b94d5390..c15a26096038 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1010,7 +1010,7 @@ unlock:
* blkcg_init_queue - initialize blkcg part of request queue
* @q: request_queue to initialize
*
- * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
+ * Called from __blk_alloc_queue(). Responsible for initializing blkcg
* part of new request_queue @q.
*
* RETURNS:
diff --git a/block/blk-core.c b/block/blk-core.c
index 60dc9552ef8d..7e4a1da0715e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -346,7 +346,6 @@ void blk_cleanup_queue(struct request_queue *q)
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
@@ -389,12 +388,6 @@ void blk_cleanup_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_cleanup_queue);
-struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
-{
- return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
-}
-EXPORT_SYMBOL(blk_alloc_queue);
-
/**
* blk_queue_enter() - try to increase q->q_usage_counter
* @q: request queue pointer
@@ -471,24 +464,19 @@ static void blk_timeout_work(struct work_struct *work)
{
}
-/**
- * blk_alloc_queue_node - allocate a request queue
- * @gfp_mask: memory allocation flags
- * @node_id: NUMA node to allocate memory from
- */
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+struct request_queue *__blk_alloc_queue(int node_id)
{
struct request_queue *q;
int ret;
q = kmem_cache_alloc_node(blk_requestq_cachep,
- gfp_mask | __GFP_ZERO, node_id);
+ GFP_KERNEL | __GFP_ZERO, node_id);
if (!q)
return NULL;
q->last_merge = NULL;
- q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
+ q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
if (q->id < 0)
goto fail_q;
@@ -496,7 +484,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (ret)
goto fail_id;
- q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
+ q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
if (!q->backing_dev_info)
goto fail_split;
@@ -542,6 +530,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (blkcg_init_queue(q))
goto fail_ref;
+ blk_queue_dma_alignment(q, 511);
+ blk_set_default_limits(&q->limits);
+
return q;
fail_ref:
@@ -558,7 +549,22 @@ fail_q:
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
-EXPORT_SYMBOL(blk_alloc_queue_node);
+
+struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id)
+{
+ struct request_queue *q;
+
+ if (WARN_ON_ONCE(!make_request))
+ return NULL;
+
+ q = __blk_alloc_queue(node_id);
+ if (!q)
+ return NULL;
+ q->make_request_fn = make_request;
+ q->nr_requests = BLKDEV_MAX_RQ;
+ return q;
+}
+EXPORT_SYMBOL(blk_alloc_queue);
bool blk_get_queue(struct request_queue *q)
{
@@ -1121,10 +1127,9 @@ blk_qc_t direct_make_request(struct bio *bio)
if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
if (nowait && !blk_queue_dying(q))
- bio->bi_status = BLK_STS_AGAIN;
+ bio_wouldblock_error(bio);
else
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
+ bio_io_error(bio);
return BLK_QC_T_NONE;
}
@@ -1203,7 +1208,7 @@ EXPORT_SYMBOL(submit_bio);
/**
* blk_cloned_rq_check_limits - Helper function to check a cloned request
- * for new the queue limits
+ * for the new queue limits
* @q: the queue
* @rq: the request being checked
*
@@ -1339,10 +1344,9 @@ void blk_account_io_done(struct request *req, u64 now)
part_stat_lock();
part = req->part;
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, true);
part_stat_inc(part, ios[sgrp]);
part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
- part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
@@ -1381,7 +1385,7 @@ void blk_account_io_start(struct request *rq, bool new_io)
rq->part = part;
}
- update_io_ticks(part, jiffies);
+ update_io_ticks(part, jiffies, false);
part_stat_unlock();
}
@@ -1583,23 +1587,6 @@ void blk_rq_unprep_clone(struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
-/*
- * Copy attributes of the original request to the clone request.
- * The actual data parts (e.g. ->cmd, ->sense) are not copied.
- */
-static void __blk_rq_prep_clone(struct request *dst, struct request *src)
-{
- dst->__sector = blk_rq_pos(src);
- dst->__data_len = blk_rq_bytes(src);
- if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
- dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
- dst->special_vec = src->special_vec;
- }
- dst->nr_phys_segments = src->nr_phys_segments;
- dst->ioprio = src->ioprio;
- dst->extra_len = src->extra_len;
-}
-
/**
* blk_rq_prep_clone - Helper function to setup clone request
* @rq: the request to be setup
@@ -1612,8 +1599,6 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src)
*
* Description:
* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
- * The actual data parts of @rq_src (e.g. ->cmd, ->sense)
- * are not copied, and copying such parts is the caller's responsibility.
* Also, pages which the original bios are pointing to are not copied
* and the cloned bios just point same pages.
* So cloned bios must be completed before original bios, which means
@@ -1644,7 +1629,16 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
rq->bio = rq->biotail = bio;
}
- __blk_rq_prep_clone(rq, rq_src);
+ /* Copy attributes of the original request to the clone request. */
+ rq->__sector = blk_rq_pos(rq_src);
+ rq->__data_len = blk_rq_bytes(rq_src);
+ if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+ rq->special_vec = rq_src->special_vec;
+ }
+ rq->nr_phys_segments = rq_src->nr_phys_segments;
+ rq->ioprio = rq_src->ioprio;
+ rq->extra_len = rq_src->extra_len;
return 0;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 5cc775bdb06a..c7f396e3d5e2 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -160,9 +160,6 @@ static void blk_account_io_flush(struct request *rq)
*
* CONTEXT:
* spin_lock_irq(fq->mq_flush_lock)
- *
- * RETURNS:
- * %true if requests were added to the dispatch queue, %false otherwise.
*/
static void blk_flush_complete_seq(struct request *rq,
struct blk_flush_queue *fq,
@@ -457,15 +454,6 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
if (!q)
return -ENXIO;
- /*
- * some block devices may not have their queue correctly set up here
- * (e.g. loop device without a backing file) and so issuing a flush
- * here will panic. Ensure there is a request function before issuing
- * the flush.
- */
- if (!q->make_request_fn)
- return -ENXIO;
-
bio = bio_alloc(gfp_mask, 0);
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
@@ -485,8 +473,8 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
}
EXPORT_SYMBOL(blkdev_issue_flush);
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
- int node, int cmd_size, gfp_t flags)
+struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
+ gfp_t flags)
{
struct blk_flush_queue *fq;
int rq_sz = sizeof(struct request);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 5ed59ac6ae58..9df50fb507ca 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -84,6 +84,7 @@ static void ioc_destroy_icq(struct io_cq *icq)
* making it impossible to determine icq_cache. Record it in @icq.
*/
icq->__rcu_icq_cache = et->icq_cache;
+ icq->flags |= ICQ_DESTROYED;
call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
}
@@ -212,15 +213,21 @@ static void __ioc_clear_queue(struct list_head *icq_list)
{
unsigned long flags;
+ rcu_read_lock();
while (!list_empty(icq_list)) {
struct io_cq *icq = list_entry(icq_list->next,
struct io_cq, q_node);
struct io_context *ioc = icq->ioc;
spin_lock_irqsave(&ioc->lock, flags);
+ if (icq->flags & ICQ_DESTROYED) {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ continue;
+ }
ioc_destroy_icq(icq);
spin_unlock_irqrestore(&ioc->lock, flags);
}
+ rcu_read_unlock();
}
/**
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 27ca68621137..db35ee682294 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -46,9 +46,6 @@
* If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
* device-specific coefficients.
*
- * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
- * device-specific coefficients.
- *
* 2. Control Strategy
*
* The device virtual time (vtime) is used as the primary control metric.
@@ -1318,7 +1315,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg)
return false;
/* is something in flight? */
- if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime))
+ if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime))
return false;
return true;
diff --git a/block/blk-map.c b/block/blk-map.c
index b0790268ed9d..b72c361911a4 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -11,6 +11,514 @@
#include "blk.h"
+struct bio_map_data {
+ int is_our_pages;
+ struct iov_iter iter;
+ struct iovec iov[];
+};
+
+static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
+ gfp_t gfp_mask)
+{
+ struct bio_map_data *bmd;
+
+ if (data->nr_segs > UIO_MAXIOV)
+ return NULL;
+
+ bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
+ if (!bmd)
+ return NULL;
+ memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
+ bmd->iter = *data;
+ bmd->iter.iov = bmd->iov;
+ return bmd;
+}
+
+/**
+ * bio_copy_from_iter - copy all pages from iov_iter to bio
+ * @bio: The &struct bio which describes the I/O as destination
+ * @iter: iov_iter as source
+ *
+ * Copy all pages from iov_iter to bio.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
+{
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ ssize_t ret;
+
+ ret = copy_page_from_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ iter);
+
+ if (!iov_iter_count(iter))
+ break;
+
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * bio_copy_to_iter - copy all pages from bio to iov_iter
+ * @bio: The &struct bio which describes the I/O as source
+ * @iter: iov_iter as destination
+ *
+ * Copy all pages from bio to iov_iter.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
+{
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ ssize_t ret;
+
+ ret = copy_page_to_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ &iter);
+
+ if (!iov_iter_count(&iter))
+ break;
+
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * bio_uncopy_user - finish previously mapped bio
+ * @bio: bio being terminated
+ *
+ * Free pages allocated from bio_copy_user_iov() and write back data
+ * to user space in case of a read.
+ */
+static int bio_uncopy_user(struct bio *bio)
+{
+ struct bio_map_data *bmd = bio->bi_private;
+ int ret = 0;
+
+ if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
+ /*
+ * if we're in a workqueue, the request is orphaned, so
+ * don't copy into a random user address space, just free
+ * and return -EINTR so user space doesn't expect any data.
+ */
+ if (!current->mm)
+ ret = -EINTR;
+ else if (bio_data_dir(bio) == READ)
+ ret = bio_copy_to_iter(bio, bmd->iter);
+ if (bmd->is_our_pages)
+ bio_free_pages(bio);
+ }
+ kfree(bmd);
+ bio_put(bio);
+ return ret;
+}
+
+/**
+ * bio_copy_user_iov - copy user data to bio
+ * @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
+ *
+ * Prepares and returns a bio for indirect user io, bouncing data
+ * to/from kernel pages as necessary. Must be paired with
+ * call bio_uncopy_user() on io completion.
+ */
+static struct bio *bio_copy_user_iov(struct request_queue *q,
+ struct rq_map_data *map_data, struct iov_iter *iter,
+ gfp_t gfp_mask)
+{
+ struct bio_map_data *bmd;
+ struct page *page;
+ struct bio *bio;
+ int i = 0, ret;
+ int nr_pages;
+ unsigned int len = iter->count;
+ unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
+
+ bmd = bio_alloc_map_data(iter, gfp_mask);
+ if (!bmd)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * We need to do a deep copy of the iov_iter including the iovecs.
+ * The caller provided iov might point to an on-stack or otherwise
+ * shortlived one.
+ */
+ bmd->is_our_pages = map_data ? 0 : 1;
+
+ nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ if (nr_pages > BIO_MAX_PAGES)
+ nr_pages = BIO_MAX_PAGES;
+
+ ret = -ENOMEM;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ goto out_bmd;
+
+ ret = 0;
+
+ if (map_data) {
+ nr_pages = 1 << map_data->page_order;
+ i = map_data->offset / PAGE_SIZE;
+ }
+ while (len) {
+ unsigned int bytes = PAGE_SIZE;
+
+ bytes -= offset;
+
+ if (bytes > len)
+ bytes = len;
+
+ if (map_data) {
+ if (i == map_data->nr_entries * nr_pages) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ page = map_data->pages[i / nr_pages];
+ page += (i % nr_pages);
+
+ i++;
+ } else {
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
+ if (!map_data)
+ __free_page(page);
+ break;
+ }
+
+ len -= bytes;
+ offset = 0;
+ }
+
+ if (ret)
+ goto cleanup;
+
+ if (map_data)
+ map_data->offset += bio->bi_iter.bi_size;
+
+ /*
+ * success
+ */
+ if ((iov_iter_rw(iter) == WRITE &&
+ (!map_data || !map_data->null_mapped)) ||
+ (map_data && map_data->from_user)) {
+ ret = bio_copy_from_iter(bio, iter);
+ if (ret)
+ goto cleanup;
+ } else {
+ if (bmd->is_our_pages)
+ zero_fill_bio(bio);
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
+ }
+
+ bio->bi_private = bmd;
+ if (map_data && map_data->null_mapped)
+ bio_set_flag(bio, BIO_NULL_MAPPED);
+ return bio;
+cleanup:
+ if (!map_data)
+ bio_free_pages(bio);
+ bio_put(bio);
+out_bmd:
+ kfree(bmd);
+ return ERR_PTR(ret);
+}
+
+/**
+ * bio_map_user_iov - map user iovec into bio
+ * @q: the struct request_queue for the bio
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
+ *
+ * Map the user space address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_map_user_iov(struct request_queue *q,
+ struct iov_iter *iter, gfp_t gfp_mask)
+{
+ int j;
+ struct bio *bio;
+ int ret;
+
+ if (!iov_iter_count(iter))
+ return ERR_PTR(-EINVAL);
+
+ bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ while (iov_iter_count(iter)) {
+ struct page **pages;
+ ssize_t bytes;
+ size_t offs, added = 0;
+ int npages;
+
+ bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
+ if (unlikely(bytes <= 0)) {
+ ret = bytes ? bytes : -EFAULT;
+ goto out_unmap;
+ }
+
+ npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
+
+ if (unlikely(offs & queue_dma_alignment(q))) {
+ ret = -EINVAL;
+ j = 0;
+ } else {
+ for (j = 0; j < npages; j++) {
+ struct page *page = pages[j];
+ unsigned int n = PAGE_SIZE - offs;
+ bool same_page = false;
+
+ if (n > bytes)
+ n = bytes;
+
+ if (!__bio_add_pc_page(q, bio, page, n, offs,
+ &same_page)) {
+ if (same_page)
+ put_page(page);
+ break;
+ }
+
+ added += n;
+ bytes -= n;
+ offs = 0;
+ }
+ iov_iter_advance(iter, added);
+ }
+ /*
+ * release the pages we didn't map into the bio, if any
+ */
+ while (j < npages)
+ put_page(pages[j++]);
+ kvfree(pages);
+ /* couldn't stuff something into bio? */
+ if (bytes)
+ break;
+ }
+
+ bio_set_flag(bio, BIO_USER_MAPPED);
+
+ /*
+ * subtle -- if bio_map_user_iov() ended up bouncing a bio,
+ * it would normally disappear when its bi_end_io is run.
+ * however, we need it for the unmap, so grab an extra
+ * reference to it
+ */
+ bio_get(bio);
+ return bio;
+
+ out_unmap:
+ bio_release_pages(bio, false);
+ bio_put(bio);
+ return ERR_PTR(ret);
+}
+
+/**
+ * bio_unmap_user - unmap a bio
+ * @bio: the bio being unmapped
+ *
+ * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
+ * process context.
+ *
+ * bio_unmap_user() may sleep.
+ */
+static void bio_unmap_user(struct bio *bio)
+{
+ bio_release_pages(bio, bio_data_dir(bio) == READ);
+ bio_put(bio);
+ bio_put(bio);
+}
+
+static void bio_invalidate_vmalloc_pages(struct bio *bio)
+{
+#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+ if (bio->bi_private && !op_is_write(bio_op(bio))) {
+ unsigned long i, len = 0;
+
+ for (i = 0; i < bio->bi_vcnt; i++)
+ len += bio->bi_io_vec[i].bv_len;
+ invalidate_kernel_vmap_range(bio->bi_private, len);
+ }
+#endif
+}
+
+static void bio_map_kern_endio(struct bio *bio)
+{
+ bio_invalidate_vmalloc_pages(bio);
+ bio_put(bio);
+}
+
+/**
+ * bio_map_kern - map kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to map
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio allocation
+ *
+ * Map the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_map_kern(struct request_queue *q, void *data,
+ unsigned int len, gfp_t gfp_mask)
+{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
+ const int nr_pages = end - start;
+ bool is_vmalloc = is_vmalloc_addr(data);
+ struct page *page;
+ int offset, i;
+ struct bio *bio;
+
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ if (is_vmalloc) {
+ flush_kernel_vmap_range(data, len);
+ bio->bi_private = data;
+ }
+
+ offset = offset_in_page(kaddr);
+ for (i = 0; i < nr_pages; i++) {
+ unsigned int bytes = PAGE_SIZE - offset;
+
+ if (len <= 0)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ if (!is_vmalloc)
+ page = virt_to_page(data);
+ else
+ page = vmalloc_to_page(data);
+ if (bio_add_pc_page(q, bio, page, bytes,
+ offset) < bytes) {
+ /* we don't support partial mappings */
+ bio_put(bio);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+
+ bio->bi_end_io = bio_map_kern_endio;
+ return bio;
+}
+
+static void bio_copy_kern_endio(struct bio *bio)
+{
+ bio_free_pages(bio);
+ bio_put(bio);
+}
+
+static void bio_copy_kern_endio_read(struct bio *bio)
+{
+ char *p = bio->bi_private;
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
+ p += bvec->bv_len;
+ }
+
+ bio_copy_kern_endio(bio);
+}
+
+/**
+ * bio_copy_kern - copy kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to copy
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio and page allocation
+ * @reading: data direction is READ
+ *
+ * copy the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+static struct bio *bio_copy_kern(struct request_queue *q, void *data,
+ unsigned int len, gfp_t gfp_mask, int reading)
+{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
+ struct bio *bio;
+ void *p = data;
+ int nr_pages = 0;
+
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
+
+ nr_pages = end - start;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
+
+ while (len) {
+ struct page *page;
+ unsigned int bytes = PAGE_SIZE;
+
+ if (bytes > len)
+ bytes = len;
+
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page)
+ goto cleanup;
+
+ if (!reading)
+ memcpy(page_address(page), p, bytes);
+
+ if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+ break;
+
+ len -= bytes;
+ p += bytes;
+ }
+
+ if (reading) {
+ bio->bi_end_io = bio_copy_kern_endio_read;
+ bio->bi_private = data;
+ } else {
+ bio->bi_end_io = bio_copy_kern_endio;
+ }
+
+ return bio;
+
+cleanup:
+ bio_free_pages(bio);
+ bio_put(bio);
+ return ERR_PTR(-ENOMEM);
+}
+
/*
* Append a bio to a passthrough request. Only works if the bio can be merged
* into the request based on the driver constraints.
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 856356b1619e..74cedea56034 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -398,6 +398,28 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != -1));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+ /*
+ * Firstly normal IO request is inserted to scheduler queue or
+ * sw queue, meantime we add flush request to dispatch queue(
+ * hctx->dispatch) directly and there is at most one in-flight
+ * flush request for each hw queue, so it doesn't matter to add
+ * flush request to tail or front of the dispatch queue.
+ *
+ * Secondly in case of NCQ, flush request belongs to non-NCQ
+ * command, and queueing it will fail when there is any
+ * in-flight normal IO request(NCQ command). When adding flush
+ * rq to the front of hctx->dispatch, it is easier to introduce
+ * extra time to flush rq's latency because of S_SCHED_RESTART
+ * compared with adding to the tail of dispatch queue, then
+ * chance of flush merge is increased, and less flush requests
+ * will be issued to controller. It is observed that ~10% time
+ * is saved in blktests block/004 on disk attached to AHCI/NCQ
+ * drive when adding flush rq to the front of hctx->dispatch.
+ *
+ * Simply queue flush rq to the front of hctx->dispatch so that
+ * intensive flush workloads can benefit in case of NCQ HW.
+ */
+ at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d92088dec6c3..f6291ceedee4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1178,6 +1178,23 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
+static void blk_mq_handle_dev_resource(struct request *rq,
+ struct list_head *list)
+{
+ struct request *next =
+ list_first_entry_or_null(list, struct request, queuelist);
+
+ /*
+ * If an I/O scheduler has been configured and we got a driver tag for
+ * the next request already, free it.
+ */
+ if (next)
+ blk_mq_put_driver_tag(next);
+
+ list_add(&rq->queuelist, list);
+ __blk_mq_requeue_request(rq);
+}
+
/*
* Returns true if we did some work AND can potentially do more.
*/
@@ -1245,17 +1262,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
ret = q->mq_ops->queue_rq(hctx, &bd);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
- /*
- * If an I/O scheduler has been configured and we got a
- * driver tag for the next request already, free it
- * again.
- */
- if (!list_empty(list)) {
- nxt = list_first_entry(list, struct request, queuelist);
- blk_mq_put_driver_tag(nxt);
- }
- list_add(&rq->queuelist, list);
- __blk_mq_requeue_request(rq);
+ blk_mq_handle_dev_resource(rq, list);
break;
}
@@ -2409,8 +2416,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
- hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
- gfp);
+ hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
if (!hctx->fq)
goto free_bitmap;
@@ -2718,13 +2724,15 @@ void blk_mq_release(struct request_queue *q)
blk_mq_sysfs_deinit(q);
}
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+ void *queuedata)
{
struct request_queue *uninit_q, *q;
- uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+ uninit_q = __blk_alloc_queue(set->numa_node);
if (!uninit_q)
return ERR_PTR(-ENOMEM);
+ uninit_q->queuedata = queuedata;
/*
* Initialize the queue without an elevator. device_add_disk() will do
@@ -2736,6 +2744,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
return q;
}
+EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
+
+struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+{
+ return blk_mq_init_queue_data(set, NULL);
+}
EXPORT_SYMBOL(blk_mq_init_queue);
/*
@@ -2824,7 +2838,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
memcpy(new_hctxs, hctxs, q->nr_hw_queues *
sizeof(*hctxs));
q->queue_hw_ctx = new_hctxs;
- q->nr_hw_queues = set->nr_hw_queues;
kfree(hctxs);
hctxs = new_hctxs;
}
@@ -2926,11 +2939,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&q->requeue_list);
spin_lock_init(&q->requeue_lock);
- blk_queue_make_request(q, blk_mq_make_request);
-
- /*
- * Do this after blk_queue_make_request() overrides it...
- */
+ q->make_request_fn = blk_mq_make_request;
q->nr_requests = set->queue_depth;
/*
@@ -3023,6 +3032,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
{
+ /*
+ * blk_mq_map_queues() and multiple .map_queues() implementations
+ * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
+ * number of hardware queues.
+ */
+ if (set->nr_maps == 1)
+ set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
+
if (set->ops->map_queues && !is_kdump_kernel()) {
int i;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c8eda2e7b91e..14397b4c4b53 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -87,42 +87,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
EXPORT_SYMBOL(blk_set_stacking_limits);
/**
- * blk_queue_make_request - define an alternate make_request function for a device
- * @q: the request queue for the device to be affected
- * @mfn: the alternate make_request function
- *
- * Description:
- * The normal way for &struct bios to be passed to a device
- * driver is for them to be collected into requests on a request
- * queue, and then to allow the device driver to select requests
- * off that queue when it is ready. This works well for many block
- * devices. However some block devices (typically virtual devices
- * such as md or lvm) do not benefit from the processing on the
- * request queue, and are served best by having the requests passed
- * directly to them. This can be achieved by providing a function
- * to blk_queue_make_request().
- *
- * Caveat:
- * The driver that does this *must* be able to deal appropriately
- * with buffers in "highmemory". This can be accomplished by either calling
- * kmap_atomic() to get a temporary kernel mapping, or by calling
- * blk_queue_bounce() to create a buffer in normal memory.
- **/
-void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
-{
- /*
- * set defaults
- */
- q->nr_requests = BLKDEV_MAX_RQ;
-
- q->make_request_fn = mfn;
- blk_queue_dma_alignment(q, 511);
-
- blk_set_default_limits(&q->limits);
-}
-EXPORT_SYMBOL(blk_queue_make_request);
-
-/**
* blk_queue_bounce_limit - set bounce buffer limit for queue
* @q: the request queue for the device
* @max_addr: the maximum address the device can handle
@@ -664,6 +628,9 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
top, bottom);
}
+
+ t->backing_dev_info->io_pages =
+ t->limits.max_sectors >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL(disk_stack_limits);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 05741c6f618b..f87956e0dcaf 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -20,6 +20,38 @@
#include "blk.h"
+#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
+static const char *const zone_cond_name[] = {
+ ZONE_COND_NAME(NOT_WP),
+ ZONE_COND_NAME(EMPTY),
+ ZONE_COND_NAME(IMP_OPEN),
+ ZONE_COND_NAME(EXP_OPEN),
+ ZONE_COND_NAME(CLOSED),
+ ZONE_COND_NAME(READONLY),
+ ZONE_COND_NAME(FULL),
+ ZONE_COND_NAME(OFFLINE),
+};
+#undef ZONE_COND_NAME
+
+/**
+ * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
+ * @zone_cond: BLK_ZONE_COND_XXX.
+ *
+ * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
+ * into string format. Useful in the debugging and tracing zone conditions. For
+ * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
+ */
+const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
+{
+ static const char *zone_cond_str = "UNKNOWN";
+
+ if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
+ zone_cond_str = zone_cond_name[zone_cond];
+
+ return zone_cond_str;
+}
+EXPORT_SYMBOL_GPL(blk_zone_cond_str);
+
static inline sector_t blk_zone_start(struct request_queue *q,
sector_t sector)
{
@@ -173,7 +205,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
if (!op_is_zone_mgmt(op))
return -EOPNOTSUPP;
- if (!nr_sectors || end_sector > capacity)
+ if (end_sector <= sector || end_sector > capacity)
/* Out of range */
return -EINVAL;
diff --git a/block/blk.h b/block/blk.h
index 0b8884353f6b..0a94ec68af32 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -4,6 +4,7 @@
#include <linux/idr.h>
#include <linux/blk-mq.h>
+#include <linux/part_stat.h>
#include <xen/xen.h>
#include "blk-mq.h"
#include "blk-mq-sched.h"
@@ -55,8 +56,8 @@ is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
return hctx->fq->flush_rq == req;
}
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
- int node, int cmd_size, gfp_t flags);
+struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
+ gfp_t flags);
void blk_free_flush_queue(struct blk_flush_queue *q);
void blk_freeze_queue(struct request_queue *q);
@@ -149,6 +150,9 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
bip_next->bip_vec[0].bv_offset);
}
+
+void blk_integrity_add(struct gendisk *);
+void blk_integrity_del(struct gendisk *);
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool integrity_req_gap_back_merge(struct request *req,
struct bio *next)
@@ -171,6 +175,12 @@ static inline bool bio_integrity_endio(struct bio *bio)
static inline void bio_integrity_free(struct bio *bio)
{
}
+static inline void blk_integrity_add(struct gendisk *disk)
+{
+}
+static inline void blk_integrity_del(struct gendisk *disk)
+{
+}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
unsigned long blk_rq_timeout(unsigned long timeout);
@@ -214,6 +224,17 @@ static inline void elevator_exit(struct request_queue *q,
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);
+ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
@@ -354,4 +375,117 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q);
static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
#endif
+void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
+void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
+void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
+struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
+
+int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
+void blk_free_devt(dev_t devt);
+void blk_invalidate_devt(dev_t devt);
+char *disk_name(struct gendisk *hd, int partno, char *buf);
+#define ADDPART_FLAG_NONE 0
+#define ADDPART_FLAG_RAID 1
+#define ADDPART_FLAG_WHOLEDISK 2
+struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno,
+ sector_t start, sector_t len, int flags,
+ struct partition_meta_info *info);
+void __delete_partition(struct percpu_ref *ref);
+void delete_partition(struct gendisk *disk, int partno);
+int disk_expand_part_tbl(struct gendisk *disk, int target);
+
+static inline int hd_ref_init(struct hd_struct *part)
+{
+ if (percpu_ref_init(&part->ref, __delete_partition, 0,
+ GFP_KERNEL))
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void hd_struct_get(struct hd_struct *part)
+{
+ percpu_ref_get(&part->ref);
+}
+
+static inline int hd_struct_try_get(struct hd_struct *part)
+{
+ return percpu_ref_tryget_live(&part->ref);
+}
+
+static inline void hd_struct_put(struct hd_struct *part)
+{
+ percpu_ref_put(&part->ref);
+}
+
+static inline void hd_struct_kill(struct hd_struct *part)
+{
+ percpu_ref_kill(&part->ref);
+}
+
+static inline void hd_free_part(struct hd_struct *part)
+{
+ free_part_stats(part);
+ kfree(part->info);
+ percpu_ref_exit(&part->ref);
+}
+
+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ sector_t nr_sects;
+ unsigned seq;
+ do {
+ seq = read_seqcount_begin(&part->nr_sects_seq);
+ nr_sects = part->nr_sects;
+ } while (read_seqcount_retry(&part->nr_sects_seq, seq));
+ return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ sector_t nr_sects;
+
+ preempt_disable();
+ nr_sects = part->nr_sects;
+ preempt_enable();
+ return nr_sects;
+#else
+ return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ write_seqcount_begin(&part->nr_sects_seq);
+ part->nr_sects = size;
+ write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
+ preempt_disable();
+ part->nr_sects = size;
+ preempt_enable();
+#else
+ part->nr_sects = size;
+#endif
+}
+
+struct request_queue *__blk_alloc_queue(int node_id);
+
+int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+ struct page *page, unsigned int len, unsigned int offset,
+ bool *same_page);
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/genhd.c b/block/genhd.c
index ff6268970ddc..06b642b23a07 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -4,6 +4,7 @@
*/
#include <linux/module.h>
+#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/kdev_t.h>
@@ -26,7 +27,7 @@
#include "blk.h"
static DEFINE_MUTEX(block_class_lock);
-struct kobject *block_depr;
+static struct kobject *block_depr;
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
@@ -46,6 +47,78 @@ static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
+/*
+ * Set disk capacity and notify if the size is not currently
+ * zero and will not be set to zero
+ */
+void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size,
+ bool revalidate)
+{
+ sector_t capacity = get_capacity(disk);
+
+ set_capacity(disk, size);
+
+ if (revalidate)
+ revalidate_disk(disk);
+
+ if (capacity != size && capacity != 0 && size != 0) {
+ char *envp[] = { "RESIZE=1", NULL };
+
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+ }
+}
+
+EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify);
+
+/*
+ * Format the device name of the indicated disk into the supplied buffer and
+ * return a pointer to that same buffer for convenience.
+ */
+char *disk_name(struct gendisk *hd, int partno, char *buf)
+{
+ if (!partno)
+ snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
+ else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
+ snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
+ else
+ snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
+
+ return buf;
+}
+
+const char *bdevname(struct block_device *bdev, char *buf)
+{
+ return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
+}
+EXPORT_SYMBOL(bdevname);
+
+#ifdef CONFIG_SMP
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+ int cpu;
+
+ memset(stat, 0, sizeof(struct disk_stats));
+ for_each_possible_cpu(cpu) {
+ struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
+ int group;
+
+ for (group = 0; group < NR_STAT_GROUPS; group++) {
+ stat->nsecs[group] += ptr->nsecs[group];
+ stat->sectors[group] += ptr->sectors[group];
+ stat->ios[group] += ptr->ios[group];
+ stat->merges[group] += ptr->merges[group];
+ }
+
+ stat->io_ticks += ptr->io_ticks;
+ }
+}
+#else /* CONFIG_SMP */
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+{
+ memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
+}
+#endif /* CONFIG_SMP */
+
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
{
if (queue_is_mq(q))
@@ -66,7 +139,8 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
}
-unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
+static unsigned int part_in_flight(struct request_queue *q,
+ struct hd_struct *part)
{
int cpu;
unsigned int inflight;
@@ -86,8 +160,8 @@ unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
return inflight;
}
-void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2])
+static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
{
int cpu;
@@ -143,7 +217,6 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
return part;
}
-EXPORT_SYMBOL_GPL(disk_get_part);
/**
* disk_part_iter_init - initialize partition iterator
@@ -299,7 +372,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
}
return &disk->part0;
}
-EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
+
+/**
+ * disk_has_partitions
+ * @disk: gendisk of interest
+ *
+ * Walk through the partition table and check if valid partition exists.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * True if the gendisk has at least one valid non-zero size partition.
+ * Otherwise false.
+ */
+bool disk_has_partitions(struct gendisk *disk)
+{
+ struct disk_part_tbl *ptbl;
+ int i;
+ bool ret = false;
+
+ rcu_read_lock();
+ ptbl = rcu_dereference(disk->part_tbl);
+
+ /* Iterate partitions skipping the whole device at index 0 */
+ for (i = 1; i < ptbl->len; i++) {
+ if (rcu_dereference(ptbl->part[i])) {
+ ret = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(disk_has_partitions);
/*
* Can be deleted altogether. Later.
@@ -908,7 +1016,6 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
}
return disk;
}
-EXPORT_SYMBOL(get_gendisk);
/**
* bdget_disk - do bdget() by gendisk and partition number
@@ -1154,6 +1261,67 @@ static ssize_t disk_ro_show(struct device *dev,
return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
}
+ssize_t part_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)part_nr_sects_read(p));
+}
+
+ssize_t part_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ struct disk_stats stat;
+ unsigned int inflight;
+
+ part_stat_read_all(p, &stat);
+ inflight = part_in_flight(q, p);
+
+ return sprintf(buf,
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8lu %8llu %8u "
+ "%8u %8u %8u "
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8u"
+ "\n",
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ (unsigned long long)stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ (unsigned long long)stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
+ inflight,
+ jiffies_to_msecs(stat.io_ticks),
+ (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+ stat.nsecs[STAT_WRITE] +
+ stat.nsecs[STAT_DISCARD] +
+ stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ (unsigned long long)stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
+}
+
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = part_to_disk(p)->queue;
+ unsigned int inflight[2];
+
+ part_in_flight_rw(q, p, inflight);
+ return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
+}
+
static ssize_t disk_capability_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1192,10 +1360,33 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
+
#ifdef CONFIG_FAIL_MAKE_REQUEST
+ssize_t part_fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%d\n", p->make_it_fail);
+}
+
+ssize_t part_fail_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ int i;
+
+ if (count > 0 && sscanf(buf, "%d", &i) > 0)
+ p->make_it_fail = (i == 0) ? 0 : 1;
+
+ return count;
+}
+
static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
-#endif
+#endif /* CONFIG_FAIL_MAKE_REQUEST */
+
#ifdef CONFIG_FAIL_IO_TIMEOUT
static struct device_attribute dev_attr_fail_timeout =
__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
@@ -1342,8 +1533,8 @@ static char *block_devnode(struct device *dev, umode_t *mode,
{
struct gendisk *disk = dev_to_disk(dev);
- if (disk->devnode)
- return disk->devnode(disk, mode);
+ if (disk->fops->devnode)
+ return disk->fops->devnode(disk, mode);
return NULL;
}
@@ -1369,6 +1560,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
unsigned int inflight;
+ struct disk_stats stat;
/*
if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1380,7 +1572,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
+ part_stat_read_all(hd, &stat);
inflight = part_in_flight(gp->queue, hd);
+
seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
@@ -1390,23 +1584,31 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
- part_stat_read(hd, ios[STAT_READ]),
- part_stat_read(hd, merges[STAT_READ]),
- part_stat_read(hd, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(hd, STAT_READ),
- part_stat_read(hd, ios[STAT_WRITE]),
- part_stat_read(hd, merges[STAT_WRITE]),
- part_stat_read(hd, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
+ stat.ios[STAT_READ],
+ stat.merges[STAT_READ],
+ stat.sectors[STAT_READ],
+ (unsigned int)div_u64(stat.nsecs[STAT_READ],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_WRITE],
+ stat.merges[STAT_WRITE],
+ stat.sectors[STAT_WRITE],
+ (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+ NSEC_PER_MSEC),
inflight,
- jiffies_to_msecs(part_stat_read(hd, io_ticks)),
- jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
- part_stat_read(hd, ios[STAT_DISCARD]),
- part_stat_read(hd, merges[STAT_DISCARD]),
- part_stat_read(hd, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
- part_stat_read(hd, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
+ jiffies_to_msecs(stat.io_ticks),
+ (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+ stat.nsecs[STAT_WRITE] +
+ stat.nsecs[STAT_DISCARD] +
+ stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_DISCARD],
+ stat.merges[STAT_DISCARD],
+ stat.sectors[STAT_DISCARD],
+ (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+ NSEC_PER_MSEC),
+ stat.ios[STAT_FLUSH],
+ (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC)
);
}
disk_part_iter_exit(&piter);
@@ -1463,7 +1665,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
class_dev_iter_exit(&iter);
return devt;
}
-EXPORT_SYMBOL(blk_lookup_devt);
struct gendisk *__alloc_disk_node(int minors, int node_id)
{
diff --git a/block/ioctl.c b/block/ioctl.c
index 127194b9f9bd..6e827de1a4c4 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -11,6 +11,7 @@
#include <linux/blktrace_api.h>
#include <linux/pr.h>
#include <linux/uaccess.h>
+#include "blk.h"
static int blkpg_do_ioctl(struct block_device *bdev,
struct blkpg_partition __user *upart, int op)
diff --git a/block/opal_proto.h b/block/opal_proto.h
index 325cbba2465f..b486b3ec7dc4 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -36,6 +36,7 @@ enum opal_response_token {
#define DTAERROR_NO_METHOD_STATUS 0x89
#define GENERIC_HOST_SESSION_NUM 0x41
+#define FIRST_TPER_SESSION_NUM 4096
#define TPER_SYNC_SUPPORTED 0x01
#define MBR_ENABLED_MASK 0x10
diff --git a/block/partitions/Makefile b/block/partitions/Makefile
index 2f276b677c81..a7f05cdb02a8 100644
--- a/block/partitions/Makefile
+++ b/block/partitions/Makefile
@@ -3,8 +3,7 @@
# Makefile for the linux kernel.
#
-obj-$(CONFIG_BLOCK) := check.o
-
+obj-$(CONFIG_BLOCK) += core.o
obj-$(CONFIG_ACORN_PARTITION) += acorn.o
obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
obj-$(CONFIG_ATARI_PARTITION) += atari.o
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c
index 7587700fad4a..c64c57b958bf 100644
--- a/block/partitions/acorn.c
+++ b/block/partitions/acorn.c
@@ -11,7 +11,6 @@
#include <linux/adfs_fs.h>
#include "check.h"
-#include "acorn.h"
/*
* Partition types. (Oh for reusability)
diff --git a/block/partitions/acorn.h b/block/partitions/acorn.h
deleted file mode 100644
index 67b06601ca4c..000000000000
--- a/block/partitions/acorn.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/fs/partitions/acorn.h
- *
- * Copyright (C) 1996-2001 Russell King.
- *
- * I _hate_ this partitioning mess - why can't we have one defined
- * format, and everyone stick to it?
- */
-
-int adfspart_check_CUMANA(struct parsed_partitions *state);
-int adfspart_check_ADFS(struct parsed_partitions *state);
-int adfspart_check_ICS(struct parsed_partitions *state);
-int adfspart_check_POWERTEC(struct parsed_partitions *state);
-int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/block/partitions/aix.c b/block/partitions/aix.c
index 903f3ed175d0..c7b4fd1a4a97 100644
--- a/block/partitions/aix.c
+++ b/block/partitions/aix.c
@@ -6,7 +6,6 @@
*/
#include "check.h"
-#include "aix.h"
struct lvm_rec {
char lvm_id[4]; /* "_LVM" */
diff --git a/block/partitions/aix.h b/block/partitions/aix.h
deleted file mode 100644
index b4449f0b9f2b..000000000000
--- a/block/partitions/aix.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-extern int aix_partition(struct parsed_partitions *state);
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 560936617d9c..9526491d9aed 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -14,7 +14,6 @@
#include <linux/affs_hardblocks.h>
#include "check.h"
-#include "amiga.h"
static __inline__ u32
checksum_block(__be32 *m, int size)
@@ -42,9 +41,8 @@ int amiga_partition(struct parsed_partitions *state)
goto rdb_done;
data = read_part_sector(state, blk, &sect);
if (!data) {
- if (warn_no_part)
- pr_err("Dev %s: unable to read RDB block %d\n",
- bdevname(state->bdev, b), blk);
+ pr_err("Dev %s: unable to read RDB block %d\n",
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
@@ -85,9 +83,8 @@ int amiga_partition(struct parsed_partitions *state)
blk *= blksize; /* Read in terms partition table understands */
data = read_part_sector(state, blk, &sect);
if (!data) {
- if (warn_no_part)
- pr_err("Dev %s: unable to read partition block %d\n",
- bdevname(state->bdev, b), blk);
+ pr_err("Dev %s: unable to read partition block %d\n",
+ bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
}
diff --git a/block/partitions/amiga.h b/block/partitions/amiga.h
deleted file mode 100644
index 7e63f4d9d969..000000000000
--- a/block/partitions/amiga.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/amiga.h
- */
-
-int amiga_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/atari.h b/block/partitions/atari.h
index 01c2b9457394..678202442fd3 100644
--- a/block/partitions/atari.h
+++ b/block/partitions/atari.h
@@ -34,4 +34,3 @@ struct rootsector
u16 checksum; /* checksum for bootable disks */
} __packed;
-int atari_partition(struct parsed_partitions *state);
diff --git a/block/partitions/check.c b/block/partitions/check.c
deleted file mode 100644
index ffe408fead0c..000000000000
--- a/block/partitions/check.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * fs/partitions/check.c
- *
- * Code extracted from drivers/block/genhd.c
- * Copyright (C) 1991-1998 Linus Torvalds
- * Re-organised Feb 1998 Russell King
- *
- * We now have independent partition support from the
- * block drivers, which allows all the partition code to
- * be grouped in one location, and it to be mostly self
- * contained.
- *
- * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
- */
-
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/ctype.h>
-#include <linux/genhd.h>
-
-#include "check.h"
-
-#include "acorn.h"
-#include "amiga.h"
-#include "atari.h"
-#include "ldm.h"
-#include "mac.h"
-#include "msdos.h"
-#include "osf.h"
-#include "sgi.h"
-#include "sun.h"
-#include "ibm.h"
-#include "ultrix.h"
-#include "efi.h"
-#include "karma.h"
-#include "sysv68.h"
-#include "cmdline.h"
-
-int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
-
-static int (*check_part[])(struct parsed_partitions *) = {
- /*
- * Probe partition formats with tables at disk address 0
- * that also have an ADFS boot block at 0xdc0.
- */
-#ifdef CONFIG_ACORN_PARTITION_ICS
- adfspart_check_ICS,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_POWERTEC
- adfspart_check_POWERTEC,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_EESOX
- adfspart_check_EESOX,
-#endif
-
- /*
- * Now move on to formats that only have partition info at
- * disk address 0xdc0. Since these may also have stale
- * PC/BIOS partition tables, they need to come before
- * the msdos entry.
- */
-#ifdef CONFIG_ACORN_PARTITION_CUMANA
- adfspart_check_CUMANA,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_ADFS
- adfspart_check_ADFS,
-#endif
-
-#ifdef CONFIG_CMDLINE_PARTITION
- cmdline_partition,
-#endif
-#ifdef CONFIG_EFI_PARTITION
- efi_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_SGI_PARTITION
- sgi_partition,
-#endif
-#ifdef CONFIG_LDM_PARTITION
- ldm_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_MSDOS_PARTITION
- msdos_partition,
-#endif
-#ifdef CONFIG_OSF_PARTITION
- osf_partition,
-#endif
-#ifdef CONFIG_SUN_PARTITION
- sun_partition,
-#endif
-#ifdef CONFIG_AMIGA_PARTITION
- amiga_partition,
-#endif
-#ifdef CONFIG_ATARI_PARTITION
- atari_partition,
-#endif
-#ifdef CONFIG_MAC_PARTITION
- mac_partition,
-#endif
-#ifdef CONFIG_ULTRIX_PARTITION
- ultrix_partition,
-#endif
-#ifdef CONFIG_IBM_PARTITION
- ibm_partition,
-#endif
-#ifdef CONFIG_KARMA_PARTITION
- karma_partition,
-#endif
-#ifdef CONFIG_SYSV68_PARTITION
- sysv68_partition,
-#endif
- NULL
-};
-
-static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
-{
- struct parsed_partitions *state;
- int nr;
-
- state = kzalloc(sizeof(*state), GFP_KERNEL);
- if (!state)
- return NULL;
-
- nr = disk_max_parts(hd);
- state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
- if (!state->parts) {
- kfree(state);
- return NULL;
- }
-
- state->limit = nr;
-
- return state;
-}
-
-void free_partitions(struct parsed_partitions *state)
-{
- vfree(state->parts);
- kfree(state);
-}
-
-struct parsed_partitions *
-check_partition(struct gendisk *hd, struct block_device *bdev)
-{
- struct parsed_partitions *state;
- int i, res, err;
-
- state = allocate_partitions(hd);
- if (!state)
- return NULL;
- state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
- if (!state->pp_buf) {
- free_partitions(state);
- return NULL;
- }
- state->pp_buf[0] = '\0';
-
- state->bdev = bdev;
- disk_name(hd, 0, state->name);
- snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
- if (isdigit(state->name[strlen(state->name)-1]))
- sprintf(state->name, "p");
-
- i = res = err = 0;
- while (!res && check_part[i]) {
- memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
- res = check_part[i++](state);
- if (res < 0) {
- /* We have hit an I/O error which we don't report now.
- * But record it, and let the others do their job.
- */
- err = res;
- res = 0;
- }
-
- }
- if (res > 0) {
- printk(KERN_INFO "%s", state->pp_buf);
-
- free_page((unsigned long)state->pp_buf);
- return state;
- }
- if (state->access_beyond_eod)
- err = -ENOSPC;
- if (err)
- /* The partition is unrecognized. So report I/O errors if there were any */
- res = err;
- if (res) {
- if (warn_no_part)
- strlcat(state->pp_buf,
- " unable to read partition table\n", PAGE_SIZE);
- printk(KERN_INFO "%s", state->pp_buf);
- }
-
- free_page((unsigned long)state->pp_buf);
- free_partitions(state);
- return ERR_PTR(res);
-}
diff --git a/block/partitions/check.h b/block/partitions/check.h
index 6042f769471a..c577e9ee67f0 100644
--- a/block/partitions/check.h
+++ b/block/partitions/check.h
@@ -2,6 +2,7 @@
#include <linux/pagemap.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
+#include "../blk.h"
/*
* add_gd_partition adds a partitions details to the devices partition
@@ -23,19 +24,14 @@ struct parsed_partitions {
char *pp_buf;
};
-void free_partitions(struct parsed_partitions *state);
+typedef struct {
+ struct page *v;
+} Sector;
-struct parsed_partitions *
-check_partition(struct gendisk *, struct block_device *);
-
-static inline void *read_part_sector(struct parsed_partitions *state,
- sector_t n, Sector *p)
+void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p);
+static inline void put_dev_sector(Sector p)
{
- if (n >= get_capacity(state->bdev->bd_disk)) {
- state->access_beyond_eod = true;
- return NULL;
- }
- return read_dev_sector(state->bdev, n, p);
+ put_page(p.v);
}
static inline void
@@ -51,5 +47,24 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
}
}
-extern int warn_no_part;
-
+/* detection routines go here in alphabetical order: */
+int adfspart_check_ADFS(struct parsed_partitions *state);
+int adfspart_check_CUMANA(struct parsed_partitions *state);
+int adfspart_check_EESOX(struct parsed_partitions *state);
+int adfspart_check_ICS(struct parsed_partitions *state);
+int adfspart_check_POWERTEC(struct parsed_partitions *state);
+int aix_partition(struct parsed_partitions *state);
+int amiga_partition(struct parsed_partitions *state);
+int atari_partition(struct parsed_partitions *state);
+int cmdline_partition(struct parsed_partitions *state);
+int efi_partition(struct parsed_partitions *state);
+int ibm_partition(struct parsed_partitions *);
+int karma_partition(struct parsed_partitions *state);
+int ldm_partition(struct parsed_partitions *state);
+int mac_partition(struct parsed_partitions *state);
+int msdos_partition(struct parsed_partitions *state);
+int osf_partition(struct parsed_partitions *state);
+int sgi_partition(struct parsed_partitions *state);
+int sun_partition(struct parsed_partitions *state);
+int sysv68_partition(struct parsed_partitions *state);
+int ultrix_partition(struct parsed_partitions *state);
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
index f1edd5452249..8f545c36cde4 100644
--- a/block/partitions/cmdline.c
+++ b/block/partitions/cmdline.c
@@ -18,7 +18,6 @@
#include <linux/cmdline-parser.h>
#include "check.h"
-#include "cmdline.h"
static char *cmdline;
static struct cmdline_parts *bdev_parts;
diff --git a/block/partitions/cmdline.h b/block/partitions/cmdline.h
deleted file mode 100644
index e64a31636a1f..000000000000
--- a/block/partitions/cmdline.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-int cmdline_partition(struct parsed_partitions *state);
diff --git a/block/partition-generic.c b/block/partitions/core.c
index 564fae77711d..b79c4513629b 100644
--- a/block/partition-generic.c
+++ b/block/partitions/core.c
@@ -1,75 +1,176 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Code extracted from drivers/block/genhd.c
- * Copyright (C) 1991-1998 Linus Torvalds
- * Re-organised Feb 1998 Russell King
- *
- * We now have independent partition support from the
- * block drivers, which allows all the partition code to
- * be grouped in one location, and it to be mostly self
- * contained.
+ * Copyright (C) 1991-1998 Linus Torvalds
+ * Re-organised Feb 1998 Russell King
*/
-
-#include <linux/init.h>
-#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
+#include <linux/vmalloc.h>
#include <linux/blktrace_api.h>
+#include <linux/raid/detect.h>
+#include "check.h"
-#include "partitions/check.h"
+static int (*check_part[])(struct parsed_partitions *) = {
+ /*
+ * Probe partition formats with tables at disk address 0
+ * that also have an ADFS boot block at 0xdc0.
+ */
+#ifdef CONFIG_ACORN_PARTITION_ICS
+ adfspart_check_ICS,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_POWERTEC
+ adfspart_check_POWERTEC,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_EESOX
+ adfspart_check_EESOX,
+#endif
-#ifdef CONFIG_BLK_DEV_MD
-extern void md_autodetect_dev(dev_t dev);
+ /*
+ * Now move on to formats that only have partition info at
+ * disk address 0xdc0. Since these may also have stale
+ * PC/BIOS partition tables, they need to come before
+ * the msdos entry.
+ */
+#ifdef CONFIG_ACORN_PARTITION_CUMANA
+ adfspart_check_CUMANA,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_ADFS
+ adfspart_check_ADFS,
#endif
-
-/*
- * disk_name() is used by partition check code and the genhd driver.
- * It formats the devicename of the indicated disk into
- * the supplied buffer (of size at least 32), and returns
- * a pointer to that same buffer (for convenience).
- */
-char *disk_name(struct gendisk *hd, int partno, char *buf)
+#ifdef CONFIG_CMDLINE_PARTITION
+ cmdline_partition,
+#endif
+#ifdef CONFIG_EFI_PARTITION
+ efi_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_SGI_PARTITION
+ sgi_partition,
+#endif
+#ifdef CONFIG_LDM_PARTITION
+ ldm_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_MSDOS_PARTITION
+ msdos_partition,
+#endif
+#ifdef CONFIG_OSF_PARTITION
+ osf_partition,
+#endif
+#ifdef CONFIG_SUN_PARTITION
+ sun_partition,
+#endif
+#ifdef CONFIG_AMIGA_PARTITION
+ amiga_partition,
+#endif
+#ifdef CONFIG_ATARI_PARTITION
+ atari_partition,
+#endif
+#ifdef CONFIG_MAC_PARTITION
+ mac_partition,
+#endif
+#ifdef CONFIG_ULTRIX_PARTITION
+ ultrix_partition,
+#endif
+#ifdef CONFIG_IBM_PARTITION
+ ibm_partition,
+#endif
+#ifdef CONFIG_KARMA_PARTITION
+ karma_partition,
+#endif
+#ifdef CONFIG_SYSV68_PARTITION
+ sysv68_partition,
+#endif
+ NULL
+};
+
+static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
{
- if (!partno)
- snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
- else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
- snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
- else
- snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
+ struct parsed_partitions *state;
+ int nr;
- return buf;
-}
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
-const char *bdevname(struct block_device *bdev, char *buf)
-{
- return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
-}
+ nr = disk_max_parts(hd);
+ state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
+ if (!state->parts) {
+ kfree(state);
+ return NULL;
+ }
-EXPORT_SYMBOL(bdevname);
+ state->limit = nr;
-const char *bio_devname(struct bio *bio, char *buf)
-{
- return disk_name(bio->bi_disk, bio->bi_partno, buf);
+ return state;
}
-EXPORT_SYMBOL(bio_devname);
-/*
- * There's very little reason to use this, you should really
- * have a struct block_device just about everywhere and use
- * bdevname() instead.
- */
-const char *__bdevname(dev_t dev, char *buffer)
+static void free_partitions(struct parsed_partitions *state)
{
- scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
- MAJOR(dev), MINOR(dev));
- return buffer;
+ vfree(state->parts);
+ kfree(state);
}
-EXPORT_SYMBOL(__bdevname);
+static struct parsed_partitions *check_partition(struct gendisk *hd,
+ struct block_device *bdev)
+{
+ struct parsed_partitions *state;
+ int i, res, err;
+
+ state = allocate_partitions(hd);
+ if (!state)
+ return NULL;
+ state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!state->pp_buf) {
+ free_partitions(state);
+ return NULL;
+ }
+ state->pp_buf[0] = '\0';
+
+ state->bdev = bdev;
+ disk_name(hd, 0, state->name);
+ snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
+ if (isdigit(state->name[strlen(state->name)-1]))
+ sprintf(state->name, "p");
+
+ i = res = err = 0;
+ while (!res && check_part[i]) {
+ memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
+ res = check_part[i++](state);
+ if (res < 0) {
+ /*
+ * We have hit an I/O error which we don't report now.
+ * But record it, and let the others do their job.
+ */
+ err = res;
+ res = 0;
+ }
+
+ }
+ if (res > 0) {
+ printk(KERN_INFO "%s", state->pp_buf);
+
+ free_page((unsigned long)state->pp_buf);
+ return state;
+ }
+ if (state->access_beyond_eod)
+ err = -ENOSPC;
+ /*
+ * The partition is unrecognized. So report I/O errors if there were any
+ */
+ if (err)
+ res = err;
+ if (res) {
+ strlcat(state->pp_buf,
+ " unable to read partition table\n", PAGE_SIZE);
+ printk(KERN_INFO "%s", state->pp_buf);
+ }
+
+ free_page((unsigned long)state->pp_buf);
+ free_partitions(state);
+ return ERR_PTR(res);
+}
static ssize_t part_partition_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -87,13 +188,6 @@ static ssize_t part_start_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
}
-ssize_t part_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
-}
-
static ssize_t part_ro_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -115,74 +209,6 @@ static ssize_t part_discard_alignment_show(struct device *dev,
return sprintf(buf, "%u\n", p->discard_alignment);
}
-ssize_t part_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct request_queue *q = part_to_disk(p)->queue;
- unsigned int inflight;
-
- inflight = part_in_flight(q, p);
- return sprintf(buf,
- "%8lu %8lu %8llu %8u "
- "%8lu %8lu %8llu %8u "
- "%8u %8u %8u "
- "%8lu %8lu %8llu %8u "
- "%8lu %8u"
- "\n",
- part_stat_read(p, ios[STAT_READ]),
- part_stat_read(p, merges[STAT_READ]),
- (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
- (unsigned int)part_stat_read_msecs(p, STAT_READ),
- part_stat_read(p, ios[STAT_WRITE]),
- part_stat_read(p, merges[STAT_WRITE]),
- (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
- (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
- inflight,
- jiffies_to_msecs(part_stat_read(p, io_ticks)),
- jiffies_to_msecs(part_stat_read(p, time_in_queue)),
- part_stat_read(p, ios[STAT_DISCARD]),
- part_stat_read(p, merges[STAT_DISCARD]),
- (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
- part_stat_read(p, ios[STAT_FLUSH]),
- (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
-}
-
-ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct request_queue *q = part_to_disk(p)->queue;
- unsigned int inflight[2];
-
- part_in_flight_rw(q, p, inflight);
- return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
-}
-
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-ssize_t part_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
-
- return sprintf(buf, "%d\n", p->make_it_fail);
-}
-
-ssize_t part_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct hd_struct *p = dev_to_part(dev);
- int i;
-
- if (count > 0 && sscanf(buf, "%d", &i) > 0)
- p->make_it_fail = (i == 0) ? 0 : 1;
-
- return count;
-}
-#endif
-
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
static DEVICE_ATTR(start, 0444, part_start_show, NULL);
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
@@ -369,7 +395,9 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
p->policy = get_disk_ro(disk);
if (info) {
- struct partition_meta_info *pinfo = alloc_part_info(disk);
+ struct partition_meta_info *pinfo;
+
+ pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id);
if (!pinfo) {
err = -ENOMEM;
goto out_free_stats;
@@ -428,7 +456,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
return p;
out_free_info:
- free_part_info(p);
+ kfree(p->info);
out_free_stats:
free_part_stats(p);
out_free:
@@ -525,10 +553,10 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
return true;
}
-#ifdef CONFIG_BLK_DEV_MD
- if (state->parts[p].flags & ADDPART_FLAG_RAID)
+ if (IS_BUILTIN(CONFIG_BLK_DEV_MD) &&
+ (state->parts[p].flags & ADDPART_FLAG_RAID))
md_autodetect_dev(part_to_dev(part)->devt);
-#endif
+
return true;
}
@@ -602,22 +630,29 @@ out_free_state:
return ret;
}
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
{
- struct address_space *mapping = bdev->bd_inode->i_mapping;
+ struct address_space *mapping = state->bdev->bd_inode->i_mapping;
struct page *page;
- page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
- if (!IS_ERR(page)) {
- if (PageError(page))
- goto fail;
- p->v = page;
- return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
-fail:
- put_page(page);
+ if (n >= get_capacity(state->bdev->bd_disk)) {
+ state->access_beyond_eod = true;
+ return NULL;
}
+
+ page = read_mapping_page(mapping,
+ (pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL);
+ if (IS_ERR(page))
+ goto out;
+ if (PageError(page))
+ goto out_put_page;
+
+ p->v = page;
+ return (unsigned char *)page_address(page) +
+ ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT);
+out_put_page:
+ put_page(page);
+out:
p->v = NULL;
return NULL;
}
-
-EXPORT_SYMBOL(read_dev_sector);
diff --git a/block/partitions/efi.h b/block/partitions/efi.h
index 5fc62fd8d9a9..8cc2b88d0aa8 100644
--- a/block/partitions/efi.h
+++ b/block/partitions/efi.h
@@ -113,7 +113,4 @@ typedef struct _legacy_mbr {
__le16 signature;
} __packed legacy_mbr;
-/* Functions */
-extern int efi_partition(struct parsed_partitions *state);
-
#endif
diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c
index a5d480f807f3..073faa6a69b8 100644
--- a/block/partitions/ibm.c
+++ b/block/partitions/ibm.c
@@ -15,7 +15,6 @@
#include <asm/vtoc.h>
#include "check.h"
-#include "ibm.h"
union label_t {
diff --git a/block/partitions/ibm.h b/block/partitions/ibm.h
deleted file mode 100644
index 8bf13febb2b6..000000000000
--- a/block/partitions/ibm.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-int ibm_partition(struct parsed_partitions *);
diff --git a/block/partitions/karma.c b/block/partitions/karma.c
index 59812d705c3d..4d93512f4bd4 100644
--- a/block/partitions/karma.c
+++ b/block/partitions/karma.c
@@ -8,9 +8,10 @@
*/
#include "check.h"
-#include "karma.h"
#include <linux/compiler.h>
+#define KARMA_LABEL_MAGIC 0xAB56
+
int karma_partition(struct parsed_partitions *state)
{
int i;
diff --git a/block/partitions/karma.h b/block/partitions/karma.h
deleted file mode 100644
index 48e074d417fb..000000000000
--- a/block/partitions/karma.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/karma.h
- */
-
-#define KARMA_LABEL_MAGIC 0xAB56
-
-int karma_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index a2d97ee1908c..6fdfcb40c537 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -14,10 +14,10 @@
#include <linux/stringify.h>
#include <linux/kernel.h>
#include <linux/uuid.h>
+#include <linux/msdos_partition.h>
#include "ldm.h"
#include "check.h"
-#include "msdos.h"
/*
* ldm_debug/info/error/crit - Output an error message
@@ -493,7 +493,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
{
Sector sect;
u8 *data;
- struct partition *p;
+ struct msdos_partition *p;
int i;
bool result = false;
@@ -508,7 +508,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC))
goto out;
- p = (struct partition*)(data + 0x01BE);
+ p = (struct msdos_partition *)(data + 0x01BE);
for (i = 0; i < 4; i++, p++)
if (SYS_IND (p) == LDM_PARTITION) {
result = true;
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index 1ca63e97bccc..841580af7f9b 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -193,7 +193,5 @@ struct ldmdb { /* Cache of the database */
struct list_head v_part;
};
-int ldm_partition(struct parsed_partitions *state);
-
#endif /* _FS_PT_LDM_H_ */
diff --git a/block/partitions/mac.h b/block/partitions/mac.h
index 453ed2964804..0e41c9da7532 100644
--- a/block/partitions/mac.h
+++ b/block/partitions/mac.h
@@ -42,4 +42,3 @@ struct mac_driver_desc {
/* ... more stuff */
};
-int mac_partition(struct parsed_partitions *state);
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 82c44f7df911..8f2fcc080264 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -18,13 +18,18 @@
* Check partition table on IDE disks for common CHS translations
*
* Re-organised Feb 1998 Russell King
+ *
+ * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
+ * updated by Marc Espie <Marc.Espie@openbsd.org>
+ *
+ * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
+ * and Krzysztof G. Baranowski <kgb@knm.org.pl>
*/
#include <linux/msdos_fs.h>
+#include <linux/msdos_partition.h>
#include "check.h"
-#include "msdos.h"
#include "efi.h"
-#include "aix.h"
/*
* Many architectures don't like unaligned accesses, while
@@ -35,17 +40,17 @@
#define SYS_IND(p) get_unaligned(&p->sys_ind)
-static inline sector_t nr_sects(struct partition *p)
+static inline sector_t nr_sects(struct msdos_partition *p)
{
return (sector_t)get_unaligned_le32(&p->nr_sects);
}
-static inline sector_t start_sect(struct partition *p)
+static inline sector_t start_sect(struct msdos_partition *p)
{
return (sector_t)get_unaligned_le32(&p->start_sect);
}
-static inline int is_extended_partition(struct partition *p)
+static inline int is_extended_partition(struct msdos_partition *p)
{
return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
@@ -68,7 +73,7 @@ msdos_magic_present(unsigned char *p)
#define AIX_LABEL_MAGIC4 0xC1
static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
{
- struct partition *pt = (struct partition *) (p + 0x1be);
+ struct msdos_partition *pt = (struct msdos_partition *) (p + 0x1be);
Sector sect;
unsigned char *d;
int slot, ret = 0;
@@ -78,13 +83,19 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
p[2] == AIX_LABEL_MAGIC3 &&
p[3] == AIX_LABEL_MAGIC4))
return 0;
- /* Assume the partition table is valid if Linux partitions exists */
+
+ /*
+ * Assume the partition table is valid if Linux partitions exists.
+ * Note that old Solaris/x86 partitions use the same indicator as
+ * Linux swap partitions, so we consider that a Linux partition as
+ * well.
+ */
for (slot = 1; slot <= 4; slot++, pt++) {
- if (pt->sys_ind == LINUX_SWAP_PARTITION ||
- pt->sys_ind == LINUX_RAID_PARTITION ||
- pt->sys_ind == LINUX_DATA_PARTITION ||
- pt->sys_ind == LINUX_LVM_PARTITION ||
- is_extended_partition(pt))
+ if (pt->sys_ind == SOLARIS_X86_PARTITION ||
+ pt->sys_ind == LINUX_RAID_PARTITION ||
+ pt->sys_ind == LINUX_DATA_PARTITION ||
+ pt->sys_ind == LINUX_LVM_PARTITION ||
+ is_extended_partition(pt))
return 0;
}
d = read_part_sector(state, 7, &sect);
@@ -122,7 +133,7 @@ static void parse_extended(struct parsed_partitions *state,
sector_t first_sector, sector_t first_size,
u32 disksig)
{
- struct partition *p;
+ struct msdos_partition *p;
Sector sect;
unsigned char *data;
sector_t this_sector, this_size;
@@ -146,7 +157,7 @@ static void parse_extended(struct parsed_partitions *state,
if (!msdos_magic_present(data + 510))
goto done;
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
/*
* Usually, the first entry is the real data partition,
@@ -210,6 +221,30 @@ done:
put_dev_sector(sect);
}
+#define SOLARIS_X86_NUMSLICE 16
+#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL)
+
+struct solaris_x86_slice {
+ __le16 s_tag; /* ID tag of partition */
+ __le16 s_flag; /* permission flags */
+ __le32 s_start; /* start sector no of partition */
+ __le32 s_size; /* # of blocks in partition */
+};
+
+struct solaris_x86_vtoc {
+ unsigned int v_bootinfo[3]; /* info needed by mboot */
+ __le32 v_sanity; /* to verify vtoc sanity */
+ __le32 v_version; /* layout version */
+ char v_volume[8]; /* volume name */
+ __le16 v_sectorsz; /* sector size in bytes */
+ __le16 v_nparts; /* number of partitions */
+ unsigned int v_reserved[10]; /* free space */
+ struct solaris_x86_slice
+ v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
+ unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */
+ char v_asciilabel[128]; /* for compatibility */
+};
+
/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
indicates linux swap. Be careful before believing this is Solaris. */
@@ -265,6 +300,54 @@ static void parse_solaris_x86(struct parsed_partitions *state,
#endif
}
+/* check against BSD src/sys/sys/disklabel.h for consistency */
+#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */
+#define BSD_MAXPARTITIONS 16
+#define OPENBSD_MAXPARTITIONS 16
+#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */
+struct bsd_disklabel {
+ __le32 d_magic; /* the magic number */
+ __s16 d_type; /* drive type */
+ __s16 d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ char d_packname[16]; /* pack identifier */
+ __u32 d_secsize; /* # of bytes per sector */
+ __u32 d_nsectors; /* # of data sectors per track */
+ __u32 d_ntracks; /* # of tracks per cylinder */
+ __u32 d_ncylinders; /* # of data cylinders per unit */
+ __u32 d_secpercyl; /* # of data sectors per cylinder */
+ __u32 d_secperunit; /* # of data sectors per unit */
+ __u16 d_sparespertrack; /* # of spare sectors per track */
+ __u16 d_sparespercyl; /* # of spare sectors per cylinder */
+ __u32 d_acylinders; /* # of alt. cylinders per unit */
+ __u16 d_rpm; /* rotational speed */
+ __u16 d_interleave; /* hardware sector interleave */
+ __u16 d_trackskew; /* sector 0 skew, per track */
+ __u16 d_cylskew; /* sector 0 skew, per cylinder */
+ __u32 d_headswitch; /* head switch time, usec */
+ __u32 d_trkseek; /* track-to-track seek, usec */
+ __u32 d_flags; /* generic flags */
+#define NDDATA 5
+ __u32 d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ __u32 d_spare[NSPARE]; /* reserved for future use */
+ __le32 d_magic2; /* the magic number (again) */
+ __le16 d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ __le16 d_npartitions; /* number of partitions in following */
+ __le32 d_bbsize; /* size of boot area at sn0, bytes */
+ __le32 d_sbsize; /* max size of fs superblock, bytes */
+ struct bsd_partition { /* the partition table */
+ __le32 p_size; /* number of sectors in partition */
+ __le32 p_offset; /* starting sector */
+ __le32 p_fsize; /* filesystem basic fragment size */
+ __u8 p_fstype; /* filesystem type, see below */
+ __u8 p_frag; /* filesystem fragments per block */
+ __le16 p_cpg; /* filesystem cylinders per group */
+ } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */
+};
+
#if defined(CONFIG_BSD_DISKLABEL)
/*
* Create devices for BSD partitions listed in a disklabel, under a
@@ -349,6 +432,51 @@ static void parse_openbsd(struct parsed_partitions *state,
#endif
}
+#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */
+#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */
+#define UNIXWARE_NUMSLICE 16
+#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */
+
+struct unixware_slice {
+ __le16 s_label; /* label */
+ __le16 s_flags; /* permission flags */
+ __le32 start_sect; /* starting sector */
+ __le32 nr_sects; /* number of sectors in slice */
+};
+
+struct unixware_disklabel {
+ __le32 d_type; /* drive type */
+ __le32 d_magic; /* the magic number */
+ __le32 d_version; /* version number */
+ char d_serial[12]; /* serial number of the device */
+ __le32 d_ncylinders; /* # of data cylinders per device */
+ __le32 d_ntracks; /* # of tracks per cylinder */
+ __le32 d_nsectors; /* # of data sectors per track */
+ __le32 d_secsize; /* # of bytes per sector */
+ __le32 d_part_start; /* # of first sector of this partition*/
+ __le32 d_unknown1[12]; /* ? */
+ __le32 d_alt_tbl; /* byte offset of alternate table */
+ __le32 d_alt_len; /* byte length of alternate table */
+ __le32 d_phys_cyl; /* # of physical cylinders per device */
+ __le32 d_phys_trk; /* # of physical tracks per cylinder */
+ __le32 d_phys_sec; /* # of physical sectors per track */
+ __le32 d_phys_bytes; /* # of physical bytes per sector */
+ __le32 d_unknown2; /* ? */
+ __le32 d_unknown3; /* ? */
+ __le32 d_pad[8]; /* pad */
+
+ struct unixware_vtoc {
+ __le32 v_magic; /* the magic number */
+ __le32 v_version; /* version number */
+ char v_name[8]; /* volume name */
+ __le16 v_nslices; /* # of slices */
+ __le16 v_unknown1; /* ? */
+ __le32 v_reserved[10]; /* reserved */
+ struct unixware_slice
+ v_slice[UNIXWARE_NUMSLICE]; /* slice headers */
+ } vtoc;
+}; /* 408 */
+
/*
* Create devices for Unixware partitions listed in a disklabel, under a
* dos-like partition. See parse_extended() for more information.
@@ -392,6 +520,8 @@ static void parse_unixware(struct parsed_partitions *state,
#endif
}
+#define MINIX_NR_SUBPARTITIONS 4
+
/*
* Minix 2.0.0/2.0.2 subpartition support.
* Anand Krishnamurthy <anandk@wiproge.med.ge.com>
@@ -403,14 +533,14 @@ static void parse_minix(struct parsed_partitions *state,
#ifdef CONFIG_MINIX_SUBPARTITION
Sector sect;
unsigned char *data;
- struct partition *p;
+ struct msdos_partition *p;
int i;
data = read_part_sector(state, offset, &sect);
if (!data)
return;
- p = (struct partition *)(data + 0x1be);
+ p = (struct msdos_partition *)(data + 0x1be);
/* The first sector of a Minix partition can have either
* a secondary MBR describing its subpartitions, or
@@ -454,7 +584,7 @@ int msdos_partition(struct parsed_partitions *state)
sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
Sector sect;
unsigned char *data;
- struct partition *p;
+ struct msdos_partition *p;
struct fat_boot_sector *fb;
int slot;
u32 disksig;
@@ -488,7 +618,7 @@ int msdos_partition(struct parsed_partitions *state)
* partition table. Reject this in case the boot indicator
* is not 0 or 0x80.
*/
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
for (slot = 1; slot <= 4; slot++, p++) {
if (p->boot_ind != 0 && p->boot_ind != 0x80) {
/*
@@ -510,7 +640,7 @@ int msdos_partition(struct parsed_partitions *state)
}
#ifdef CONFIG_EFI_PARTITION
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
for (slot = 1 ; slot <= 4 ; slot++, p++) {
/* If this is an EFI GPT disk, msdos should ignore it. */
if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) {
@@ -519,7 +649,7 @@ int msdos_partition(struct parsed_partitions *state)
}
}
#endif
- p = (struct partition *) (data + 0x1be);
+ p = (struct msdos_partition *) (data + 0x1be);
disksig = le32_to_cpup((__le32 *)(data + 0x1b8));
@@ -566,7 +696,7 @@ int msdos_partition(struct parsed_partitions *state)
strlcat(state->pp_buf, "\n", PAGE_SIZE);
/* second pass - output for each on a separate line */
- p = (struct partition *) (0x1be + data);
+ p = (struct msdos_partition *) (0x1be + data);
for (slot = 1 ; slot <= 4 ; slot++, p++) {
unsigned char id = SYS_IND(p);
int n;
diff --git a/block/partitions/msdos.h b/block/partitions/msdos.h
deleted file mode 100644
index fcacfc486092..000000000000
--- a/block/partitions/msdos.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/msdos.h
- */
-
-#define MSDOS_LABEL_MAGIC 0xAA55
-
-int msdos_partition(struct parsed_partitions *state);
-
diff --git a/block/partitions/osf.c b/block/partitions/osf.c
index 4b873973d6c0..84560d0765ed 100644
--- a/block/partitions/osf.c
+++ b/block/partitions/osf.c
@@ -9,9 +9,9 @@
*/
#include "check.h"
-#include "osf.h"
#define MAX_OSF_PARTITIONS 18
+#define DISKLABELMAGIC (0x82564557UL)
int osf_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/osf.h b/block/partitions/osf.h
deleted file mode 100644
index 4d8088e7ea8c..000000000000
--- a/block/partitions/osf.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/osf.h
- */
-
-#define DISKLABELMAGIC (0x82564557UL)
-
-int osf_partition(struct parsed_partitions *state);
diff --git a/block/partitions/sgi.c b/block/partitions/sgi.c
index d7b421c6e530..4273f1bb0515 100644
--- a/block/partitions/sgi.c
+++ b/block/partitions/sgi.c
@@ -6,7 +6,12 @@
*/
#include "check.h"
-#include "sgi.h"
+
+#define SGI_LABEL_MAGIC 0x0be5a941
+
+enum {
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+};
struct sgi_disklabel {
__be32 magic_mushroom; /* Big fat spliff... */
diff --git a/block/partitions/sgi.h b/block/partitions/sgi.h
deleted file mode 100644
index a5b77c3987cf..000000000000
--- a/block/partitions/sgi.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/sgi.h
- */
-
-extern int sgi_partition(struct parsed_partitions *state);
-
-#define SGI_LABEL_MAGIC 0x0be5a941
-
diff --git a/block/partitions/sun.c b/block/partitions/sun.c
index 90f36724e796..47dc53eccf77 100644
--- a/block/partitions/sun.c
+++ b/block/partitions/sun.c
@@ -9,7 +9,14 @@
*/
#include "check.h"
-#include "sun.h"
+
+#define SUN_LABEL_MAGIC 0xDABE
+#define SUN_VTOC_SANITY 0x600DDEEE
+
+enum {
+ SUN_WHOLE_DISK = 5,
+ LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
+};
int sun_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/sun.h b/block/partitions/sun.h
deleted file mode 100644
index ae1b9eed3fd7..000000000000
--- a/block/partitions/sun.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/sun.h
- */
-
-#define SUN_LABEL_MAGIC 0xDABE
-#define SUN_VTOC_SANITY 0x600DDEEE
-
-int sun_partition(struct parsed_partitions *state);
diff --git a/block/partitions/sysv68.c b/block/partitions/sysv68.c
index 92e810826b01..6f6257fd4eb4 100644
--- a/block/partitions/sysv68.c
+++ b/block/partitions/sysv68.c
@@ -6,7 +6,6 @@
*/
#include "check.h"
-#include "sysv68.h"
/*
* Volume ID structure: on first 256-bytes sector of disk
diff --git a/block/partitions/sysv68.h b/block/partitions/sysv68.h
deleted file mode 100644
index 4fb6b8ec78ae..000000000000
--- a/block/partitions/sysv68.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/block/partitions/ultrix.c b/block/partitions/ultrix.c
index ecd0d7346c3d..4aaa81043ca0 100644
--- a/block/partitions/ultrix.c
+++ b/block/partitions/ultrix.c
@@ -8,7 +8,6 @@
*/
#include "check.h"
-#include "ultrix.h"
int ultrix_partition(struct parsed_partitions *state)
{
diff --git a/block/partitions/ultrix.h b/block/partitions/ultrix.h
deleted file mode 100644
index 9f676cead222..000000000000
--- a/block/partitions/ultrix.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fs/partitions/ultrix.h
- */
-
-int ultrix_partition(struct parsed_partitions *state);
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 880cc57a5f6b..daafadbb88ca 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1056,7 +1056,7 @@ static int start_opal_session_cont(struct opal_dev *dev)
hsn = response_get_u64(&dev->parsed, 4);
tsn = response_get_u64(&dev->parsed, 5);
- if (hsn == 0 && tsn == 0) {
+ if (hsn != GENERIC_HOST_SESSION_NUM || tsn < FIRST_TPER_SESSION_NUM) {
pr_debug("Couldn't authenticate session\n");
return -EPERM;
}