summaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev-defs.h2
-rw-r--r--include/linux/bio.h49
-rw-r--r--include/linux/blk-cgroup.h11
-rw-r--r--include/linux/blk-mq.h11
-rw-r--r--include/linux/blk_types.h188
-rw-r--r--include/linux/blkdev.h231
-rw-r--r--include/linux/blktrace_api.h2
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/dm-io.h2
-rw-r--r--include/linux/elevator.h9
-rw-r--r--include/linux/fs.h68
-rw-r--r--include/linux/kernel.h4
-rw-r--r--include/linux/lightnvm.h244
-rw-r--r--include/linux/nvme-fc-driver.h851
-rw-r--r--include/linux/nvme-fc.h268
-rw-r--r--include/linux/nvme.h43
-rw-r--r--include/linux/parser.h1
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/linux/uio.h2
-rw-r--r--include/linux/writeback.h13
20 files changed, 1657 insertions, 347 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 0b5b1af35e5e..e850e76acaaf 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
struct list_head work_list;
struct delayed_work dwork; /* work item used for writeback */
+ unsigned long dirty_sleep; /* last wait */
+
struct list_head bdi_node; /* anchored at bdi->wb_list */
#ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 97cb48f03dc7..7cf8a6c70a3f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -63,6 +63,12 @@
#define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio)))
/*
+ * Return the data direction, READ or WRITE.
+ */
+#define bio_data_dir(bio) \
+ (op_is_write(bio_op(bio)) ? WRITE : READ)
+
+/*
* Check whether this bio carries any data or not. A NULL bio is allowed.
*/
static inline bool bio_has_data(struct bio *bio)
@@ -70,7 +76,8 @@ static inline bool bio_has_data(struct bio *bio)
if (bio &&
bio->bi_iter.bi_size &&
bio_op(bio) != REQ_OP_DISCARD &&
- bio_op(bio) != REQ_OP_SECURE_ERASE)
+ bio_op(bio) != REQ_OP_SECURE_ERASE &&
+ bio_op(bio) != REQ_OP_WRITE_ZEROES)
return true;
return false;
@@ -80,18 +87,8 @@ static inline bool bio_no_advance_iter(struct bio *bio)
{
return bio_op(bio) == REQ_OP_DISCARD ||
bio_op(bio) == REQ_OP_SECURE_ERASE ||
- bio_op(bio) == REQ_OP_WRITE_SAME;
-}
-
-static inline bool bio_is_rw(struct bio *bio)
-{
- if (!bio_has_data(bio))
- return false;
-
- if (bio_no_advance_iter(bio))
- return false;
-
- return true;
+ bio_op(bio) == REQ_OP_WRITE_SAME ||
+ bio_op(bio) == REQ_OP_WRITE_ZEROES;
}
static inline bool bio_mergeable(struct bio *bio)
@@ -193,18 +190,20 @@ static inline unsigned bio_segments(struct bio *bio)
struct bvec_iter iter;
/*
- * We special case discard/write same, because they interpret bi_size
- * differently:
+ * We special case discard/write same/write zeroes, because they
+ * interpret bi_size differently:
*/
- if (bio_op(bio) == REQ_OP_DISCARD)
- return 1;
-
- if (bio_op(bio) == REQ_OP_SECURE_ERASE)
- return 1;
-
- if (bio_op(bio) == REQ_OP_WRITE_SAME)
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
+ return 0;
+ case REQ_OP_WRITE_SAME:
return 1;
+ default:
+ break;
+ }
bio_for_each_segment(bv, bio, iter)
segs++;
@@ -409,6 +408,8 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
}
+extern blk_qc_t submit_bio(struct bio *);
+
extern void bio_endio(struct bio *);
static inline void bio_io_error(struct bio *bio)
@@ -423,13 +424,15 @@ extern int bio_phys_segments(struct request_queue *, struct bio *);
extern int submit_bio_wait(struct bio *bio);
extern void bio_advance(struct bio *, unsigned);
-extern void bio_init(struct bio *);
+extern void bio_init(struct bio *bio, struct bio_vec *table,
+ unsigned short max_vecs);
extern void bio_reset(struct bio *);
void bio_chain(struct bio *, struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
struct rq_map_data;
extern struct bio *bio_map_user_iov(struct request_queue *,
const struct iov_iter *, gfp_t);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 3bf5d33800ab..01b62e7bac74 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -581,15 +581,14 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
/**
* blkg_rwstat_add - add a value to a blkg_rwstat
* @rwstat: target blkg_rwstat
- * @op: REQ_OP
- * @op_flags: rq_flag_bits
+ * @op: REQ_OP and flags
* @val: value to add
*
* Add @val to @rwstat. The counters are chosen according to @rw. The
* caller is responsible for synchronizing calls to this function.
*/
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
- int op, int op_flags, uint64_t val)
+ unsigned int op, uint64_t val)
{
struct percpu_counter *cnt;
@@ -600,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
- if (op_flags & REQ_SYNC)
+ if (op_is_sync(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
@@ -705,9 +704,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
if (!throtl) {
blkg = blkg ?: q->root_blkg;
- blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_opf,
+ blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
bio->bi_iter.bi_size);
- blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_opf, 1);
+ blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
}
rcu_read_unlock();
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 535ab2e13d2e..87e404aae267 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -3,6 +3,7 @@
#include <linux/blkdev.h>
#include <linux/sbitmap.h>
+#include <linux/srcu.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -35,6 +36,8 @@ struct blk_mq_hw_ctx {
struct blk_mq_tags *tags;
+ struct srcu_struct queue_rq_srcu;
+
unsigned long queued;
unsigned long run;
#define BLK_MQ_MAX_DISPATCH_ORDER 7
@@ -215,18 +218,20 @@ void blk_mq_start_request(struct request *rq);
void blk_mq_end_request(struct request *rq, int error);
void __blk_mq_end_request(struct request *rq, int error);
-void blk_mq_requeue_request(struct request *rq);
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
-void blk_mq_cancel_requeue_work(struct request_queue *q);
+void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+ bool kick_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
void blk_mq_abort_requeue_list(struct request_queue *q);
void blk_mq_complete_request(struct request *rq, int error);
+bool blk_mq_queue_stopped(struct request_queue *q);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_stop_hw_queues(struct request_queue *q);
void blk_mq_start_hw_queues(struct request_queue *q);
+void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index cd395ecec99d..519ea2c9df61 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,7 +17,6 @@ struct io_context;
struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
-#ifdef CONFIG_BLOCK
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
@@ -88,24 +87,6 @@ struct bio {
struct bio_vec bi_inline_vecs[0];
};
-#define BIO_OP_SHIFT (8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS)
-#define bio_flags(bio) ((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1))
-#define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT)
-
-#define bio_set_op_attrs(bio, op, op_flags) do { \
- if (__builtin_constant_p(op)) \
- BUILD_BUG_ON((op) + 0U >= (1U << REQ_OP_BITS)); \
- else \
- WARN_ON_ONCE((op) + 0U >= (1U << REQ_OP_BITS)); \
- if (__builtin_constant_p(op_flags)) \
- BUILD_BUG_ON((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \
- else \
- WARN_ON_ONCE((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \
- (bio)->bi_opf = bio_flags(bio); \
- (bio)->bi_opf |= (((op) + 0U) << BIO_OP_SHIFT); \
- (bio)->bi_opf |= (op_flags); \
-} while (0)
-
#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
/*
@@ -119,6 +100,8 @@ struct bio {
#define BIO_QUIET 6 /* Make BIO Quiet */
#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */
#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */
+#define BIO_THROTTLED 9 /* This bio has already been subjected to
+ * throttling rules. Don't do it again. */
/*
* Flags starting here get preserved by bio_reset() - this includes
@@ -142,53 +125,61 @@ struct bio {
#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS)
#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET)
-#endif /* CONFIG_BLOCK */
-
/*
- * Request flags. For use in the cmd_flags field of struct request, and in
- * bi_opf of struct bio. Note that some flags are only valid in either one.
+ * Operations and flags common to the bio and request structures.
+ * We use 8 bits for encoding the operation, and the remaining 24 for flags.
+ *
+ * The least significant bit of the operation number indicates the data
+ * transfer direction:
+ *
+ * - if the least significant bit is set transfers are TO the device
+ * - if the least significant bit is not set transfers are FROM the device
+ *
+ * If a operation does not transfer data the least significant bit has no
+ * meaning.
*/
-enum rq_flag_bits {
- /* common flags */
- __REQ_FAILFAST_DEV, /* no driver retries of device errors */
+#define REQ_OP_BITS 8
+#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
+#define REQ_FLAG_BITS 24
+
+enum req_opf {
+ /* read sectors from the device */
+ REQ_OP_READ = 0,
+ /* write sectors to the device */
+ REQ_OP_WRITE = 1,
+ /* flush the volatile write cache */
+ REQ_OP_FLUSH = 2,
+ /* discard sectors */
+ REQ_OP_DISCARD = 3,
+ /* get zone information */
+ REQ_OP_ZONE_REPORT = 4,
+ /* securely erase sectors */
+ REQ_OP_SECURE_ERASE = 5,
+ /* seset a zone write pointer */
+ REQ_OP_ZONE_RESET = 6,
+ /* write the same sector many times */
+ REQ_OP_WRITE_SAME = 7,
+ /* write the zero filled sector many times */
+ REQ_OP_WRITE_ZEROES = 8,
+
+ REQ_OP_LAST,
+};
+
+enum req_flag_bits {
+ __REQ_FAILFAST_DEV = /* no driver retries of device errors */
+ REQ_OP_BITS,
__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
__REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
-
__REQ_SYNC, /* request is sync (sync write or read) */
__REQ_META, /* metadata io request */
__REQ_PRIO, /* boost priority in cfq */
-
- __REQ_NOIDLE, /* don't anticipate more IO after this one */
+ __REQ_NOMERGE, /* don't touch this for merging */
+ __REQ_IDLE, /* anticipate more IO after this one */
__REQ_INTEGRITY, /* I/O includes block integrity payload */
__REQ_FUA, /* forced unit access */
__REQ_PREFLUSH, /* request for cache flush */
-
- /* bio only flags */
__REQ_RAHEAD, /* read ahead, can fail anytime */
- __REQ_THROTTLED, /* This bio has already been subjected to
- * throttling rules. Don't do it again. */
-
- /* request only flags */
- __REQ_SORTED, /* elevator knows about this request */
- __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
- __REQ_NOMERGE, /* don't touch this for merging */
- __REQ_STARTED, /* drive already may have started this one */
- __REQ_DONTPREP, /* don't call prep for this one */
- __REQ_QUEUED, /* uses queueing */
- __REQ_ELVPRIV, /* elevator private data attached */
- __REQ_FAILED, /* set if the request failed */
- __REQ_QUIET, /* don't worry about errors */
- __REQ_PREEMPT, /* set for "ide_preempt" requests and also
- for requests for which the SCSI "quiesce"
- state must be ignored. */
- __REQ_ALLOCED, /* request came from our alloc pool */
- __REQ_COPY_USER, /* contains copies of user pages */
- __REQ_FLUSH_SEQ, /* request for flush sequence */
- __REQ_IO_STAT, /* account I/O stat */
- __REQ_MIXED_MERGE, /* merge of different types, fail separately */
- __REQ_PM, /* runtime pm request */
- __REQ_HASHED, /* on IO scheduler merge hash */
- __REQ_MQ_INFLIGHT, /* track inflight for MQ */
+ __REQ_BACKGROUND, /* background IO */
__REQ_NR_BITS, /* stops here */
};
@@ -198,54 +189,47 @@ enum rq_flag_bits {
#define REQ_SYNC (1ULL << __REQ_SYNC)
#define REQ_META (1ULL << __REQ_META)
#define REQ_PRIO (1ULL << __REQ_PRIO)
-#define REQ_NOIDLE (1ULL << __REQ_NOIDLE)
+#define REQ_NOMERGE (1ULL << __REQ_NOMERGE)
+#define REQ_IDLE (1ULL << __REQ_IDLE)
#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY)
+#define REQ_FUA (1ULL << __REQ_FUA)
+#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
+#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
+#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-#define REQ_COMMON_MASK \
- (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
- REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
-#define REQ_CLONE_MASK REQ_COMMON_MASK
-/* This mask is used for both bio and request merge checking */
#define REQ_NOMERGE_FLAGS \
- (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
+ (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
-#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
-#define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
+#define bio_op(bio) \
+ ((bio)->bi_opf & REQ_OP_MASK)
+#define req_op(req) \
+ ((req)->cmd_flags & REQ_OP_MASK)
-#define REQ_SORTED (1ULL << __REQ_SORTED)
-#define REQ_SOFTBARRIER (1ULL << __REQ_SOFTBARRIER)
-#define REQ_FUA (1ULL << __REQ_FUA)
-#define REQ_NOMERGE (1ULL << __REQ_NOMERGE)
-#define REQ_STARTED (1ULL << __REQ_STARTED)
-#define REQ_DONTPREP (1ULL << __REQ_DONTPREP)
-#define REQ_QUEUED (1ULL << __REQ_QUEUED)
-#define REQ_ELVPRIV (1ULL << __REQ_ELVPRIV)
-#define REQ_FAILED (1ULL << __REQ_FAILED)
-#define REQ_QUIET (1ULL << __REQ_QUIET)
-#define REQ_PREEMPT (1ULL << __REQ_PREEMPT)
-#define REQ_ALLOCED (1ULL << __REQ_ALLOCED)
-#define REQ_COPY_USER (1ULL << __REQ_COPY_USER)
-#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
-#define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ)
-#define REQ_IO_STAT (1ULL << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE)
-#define REQ_PM (1ULL << __REQ_PM)
-#define REQ_HASHED (1ULL << __REQ_HASHED)
-#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
-
-enum req_op {
- REQ_OP_READ,
- REQ_OP_WRITE,
- REQ_OP_DISCARD, /* request to discard sectors */
- REQ_OP_SECURE_ERASE, /* request to securely erase sectors */
- REQ_OP_WRITE_SAME, /* write same block many times */
- REQ_OP_FLUSH, /* request for cache flush */
-};
+/* obsolete, don't use in new code */
+static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
+ unsigned op_flags)
+{
+ bio->bi_opf = op | op_flags;
+}
-#define REQ_OP_BITS 3
+static inline bool op_is_write(unsigned int op)
+{
+ return (op & 1);
+}
+
+/*
+ * Reads are always treated as synchronous, as are requests with the FUA or
+ * PREFLUSH flag. Other operations may be marked as synchronous using the
+ * REQ_SYNC flag.
+ */
+static inline bool op_is_sync(unsigned int op)
+{
+ return (op & REQ_OP_MASK) == REQ_OP_READ ||
+ (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
+}
typedef unsigned int blk_qc_t;
#define BLK_QC_T_NONE -1U
@@ -271,4 +255,20 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
}
+struct blk_issue_stat {
+ u64 time;
+};
+
+#define BLK_RQ_STAT_BATCH 64
+
+struct blk_rq_stat {
+ s64 mean;
+ u64 min;
+ u64 max;
+ s32 nr_samples;
+ s32 nr_batch;
+ u64 batch;
+ s64 time;
+};
+
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c47c358ba052..c5393766909d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
#include <linux/rcupdate.h>
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
+#include <linux/blkzoned.h>
struct module;
struct scsi_ioctl_command;
@@ -37,6 +38,7 @@ struct bsg_job;
struct blkcg_gq;
struct blk_flush_queue;
struct pr_ops;
+struct rq_wb;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -77,6 +79,55 @@ enum rq_cmd_type_bits {
REQ_TYPE_DRV_PRIV, /* driver defined types from here */
};
+/*
+ * request flags */
+typedef __u32 __bitwise req_flags_t;
+
+/* elevator knows about this request */
+#define RQF_SORTED ((__force req_flags_t)(1 << 0))
+/* drive already may have started this one */
+#define RQF_STARTED ((__force req_flags_t)(1 << 1))
+/* uses tagged queueing */
+#define RQF_QUEUED ((__force req_flags_t)(1 << 2))
+/* may not be passed by ioscheduler */
+#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
+/* request for flush sequence */
+#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
+/* merge of different types, fail separately */
+#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
+/* track inflight for MQ */
+#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
+/* don't call prep for this one */
+#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
+/* set for "ide_preempt" requests and also for requests for which the SCSI
+ "quiesce" state must be ignored. */
+#define RQF_PREEMPT ((__force req_flags_t)(1 << 8))
+/* contains copies of user pages */
+#define RQF_COPY_USER ((__force req_flags_t)(1 << 9))
+/* vaguely specified driver internal error. Ignored by the block layer */
+#define RQF_FAILED ((__force req_flags_t)(1 << 10))
+/* don't warn about errors */
+#define RQF_QUIET ((__force req_flags_t)(1 << 11))
+/* elevator private data attached */
+#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12))
+/* account I/O stat */
+#define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
+/* request came from our alloc pool */
+#define RQF_ALLOCED ((__force req_flags_t)(1 << 14))
+/* runtime pm request */
+#define RQF_PM ((__force req_flags_t)(1 << 15))
+/* on IO scheduler merge hash */
+#define RQF_HASHED ((__force req_flags_t)(1 << 16))
+/* IO stats tracking on */
+#define RQF_STATS ((__force req_flags_t)(1 << 17))
+/* Look at ->special_vec for the actual data payload instead of the
+ bio chain. */
+#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
+
+/* flags that prevent us from merging requests: */
+#define RQF_NOMERGE_FLAGS \
+ (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+
#define BLK_MAX_CDB 16
/*
@@ -97,7 +148,8 @@ struct request {
int cpu;
unsigned cmd_type;
- u64 cmd_flags;
+ unsigned int cmd_flags; /* op and common flags */
+ req_flags_t rq_flags;
unsigned long atomic_flags;
/* the following two fields are internal, NEVER access directly */
@@ -126,6 +178,7 @@ struct request {
*/
union {
struct rb_node rb_node; /* sort/lookup */
+ struct bio_vec special_vec;
void *completion_data;
};
@@ -151,6 +204,7 @@ struct request {
struct gendisk *rq_disk;
struct hd_struct *part;
unsigned long start_time;
+ struct blk_issue_stat issue_stat;
#ifdef CONFIG_BLK_CGROUP
struct request_list *rl; /* rl this rq is alloced from */
unsigned long long start_time_ns;
@@ -198,20 +252,6 @@ struct request {
struct request *next_rq;
};
-#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS)
-#define req_op(req) ((req)->cmd_flags >> REQ_OP_SHIFT)
-
-#define req_set_op(req, op) do { \
- WARN_ON(op >= (1 << REQ_OP_BITS)); \
- (req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1); \
- (req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT); \
-} while (0)
-
-#define req_set_op_attrs(req, op, flags) do { \
- req_set_op(req, op); \
- (req)->cmd_flags |= flags; \
-} while (0)
-
static inline unsigned short req_get_ioprio(struct request *req)
{
return req->ioprio;
@@ -261,6 +301,15 @@ struct blk_queue_tag {
#define BLK_SCSI_MAX_CMDS (256)
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+/*
+ * Zoned block device models (zoned limit).
+ */
+enum blk_zoned_model {
+ BLK_ZONED_NONE, /* Regular block device */
+ BLK_ZONED_HA, /* Host-aware zoned block device */
+ BLK_ZONED_HM, /* Host-managed zoned block device */
+};
+
struct queue_limits {
unsigned long bounce_pfn;
unsigned long seg_boundary_mask;
@@ -278,6 +327,7 @@ struct queue_limits {
unsigned int max_discard_sectors;
unsigned int max_hw_discard_sectors;
unsigned int max_write_same_sectors;
+ unsigned int max_write_zeroes_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -290,8 +340,45 @@ struct queue_limits {
unsigned char cluster;
unsigned char discard_zeroes_data;
unsigned char raid_partial_stripes_expensive;
+ enum blk_zoned_model zoned;
};
+#ifdef CONFIG_BLK_DEV_ZONED
+
+struct blk_zone_report_hdr {
+ unsigned int nr_zones;
+ u8 padding[60];
+};
+
+extern int blkdev_report_zones(struct block_device *bdev,
+ sector_t sector, struct blk_zone *zones,
+ unsigned int *nr_zones, gfp_t gfp_mask);
+extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
+ sector_t nr_sectors, gfp_t gfp_mask);
+
+extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
+extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
+
+#else /* CONFIG_BLK_DEV_ZONED */
+
+static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
+ fmode_t mode, unsigned int cmd,
+ unsigned long arg)
+{
+ return -ENOTTY;
+}
+
+static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
+ fmode_t mode, unsigned int cmd,
+ unsigned long arg)
+{
+ return -ENOTTY;
+}
+
+#endif /* CONFIG_BLK_DEV_ZONED */
+
struct request_queue {
/*
* Together with queue_head for cacheline sharing
@@ -302,6 +389,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
+ struct rq_wb *rq_wb;
+
/*
* If blkcg is not used, @q->root_rl serves all requests. If blkcg
* is used, root blkg allocates from @q->root_rl and all other
@@ -327,6 +416,8 @@ struct request_queue {
struct blk_mq_ctx __percpu *queue_ctx;
unsigned int nr_queues;
+ unsigned int queue_depth;
+
/* hw dispatch queues */
struct blk_mq_hw_ctx **queue_hw_ctx;
unsigned int nr_hw_queues;
@@ -412,6 +503,9 @@ struct request_queue {
unsigned int nr_sorted;
unsigned int in_flight[2];
+
+ struct blk_rq_stat rq_stats[2];
+
/*
* Number of active block driver functions for which blk_drain_queue()
* must wait. Must be incremented around functions that unlock the
@@ -420,6 +514,7 @@ struct request_queue {
unsigned int request_fn_active;
unsigned int rq_timeout;
+ int poll_nsec;
struct timer_list timeout;
struct work_struct timeout_work;
struct list_head timeout_list;
@@ -505,6 +600,7 @@ struct request_queue {
#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */
#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
#define QUEUE_FLAG_DAX 26 /* device supports DAX */
+#define QUEUE_FLAG_STATS 27 /* track rq completion times */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -601,7 +697,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
REQ_FAILFAST_DRIVER))
#define blk_account_rq(rq) \
- (((rq)->cmd_flags & REQ_STARTED) && \
+ (((rq)->rq_flags & RQF_STARTED) && \
((rq)->cmd_type == REQ_TYPE_FS))
#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
@@ -627,17 +723,31 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q)
return q->limits.cluster;
}
-/*
- * We regard a request as sync, if either a read or a sync write
- */
-static inline bool rw_is_sync(int op, unsigned int rw_flags)
+static inline enum blk_zoned_model
+blk_queue_zoned_model(struct request_queue *q)
{
- return op == REQ_OP_READ || (rw_flags & REQ_SYNC);
+ return q->limits.zoned;
+}
+
+static inline bool blk_queue_is_zoned(struct request_queue *q)
+{
+ switch (blk_queue_zoned_model(q)) {
+ case BLK_ZONED_HA:
+ case BLK_ZONED_HM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline unsigned int blk_queue_zone_size(struct request_queue *q)
+{
+ return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
}
static inline bool rq_is_sync(struct request *rq)
{
- return rw_is_sync(req_op(rq), rq->cmd_flags);
+ return op_is_sync(rq->cmd_flags);
}
static inline bool blk_rl_full(struct request_list *rl, bool sync)
@@ -669,8 +779,13 @@ static inline bool rq_mergeable(struct request *rq)
if (req_op(rq) == REQ_OP_FLUSH)
return false;
+ if (req_op(rq) == REQ_OP_WRITE_ZEROES)
+ return false;
+
if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
return false;
+ if (rq->rq_flags & RQF_NOMERGE_FLAGS)
+ return false;
return true;
}
@@ -683,6 +798,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
return false;
}
+static inline unsigned int blk_queue_depth(struct request_queue *q)
+{
+ if (q->queue_depth)
+ return q->queue_depth;
+
+ return q->nr_requests;
+}
+
/*
* q->prep_rq_fn return values
*/
@@ -790,8 +913,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
extern void blk_rq_set_block_pc(struct request *);
extern void blk_requeue_request(struct request_queue *, struct request *);
-extern void blk_add_request_payload(struct request *rq, struct page *page,
- int offset, unsigned int len);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio_set *bs, gfp_t gfp_mask,
@@ -824,6 +945,7 @@ extern void __blk_run_queue(struct request_queue *q);
extern void __blk_run_queue_uncond(struct request_queue *q);
extern void blk_run_queue(struct request_queue *);
extern void blk_run_queue_async(struct request_queue *q);
+extern void blk_mq_quiesce_queue(struct request_queue *q);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
gfp_t);
@@ -837,7 +959,7 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{
@@ -888,6 +1010,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
if (unlikely(op == REQ_OP_WRITE_SAME))
return q->limits.max_write_same_sectors;
+ if (unlikely(op == REQ_OP_WRITE_ZEROES))
+ return q->limits.max_write_zeroes_sectors;
+
return q->limits.max_sectors;
}
@@ -991,6 +1116,8 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
extern void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
+extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+ unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -999,6 +1126,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_default_limits(struct queue_limits *lim);
extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
@@ -1027,6 +1155,13 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
+static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
+{
+ if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+ return 1;
+ return rq->nr_phys_segments;
+}
+
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
extern long nr_blockdev_pages(void);
@@ -1057,7 +1192,7 @@ static inline int blk_pre_runtime_suspend(struct request_queue *q)
static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
static inline void blk_pre_runtime_resume(struct request_queue *q) {}
static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
-extern inline void blk_set_runtime_active(struct request_queue *q) {}
+static inline void blk_set_runtime_active(struct request_queue *q) {}
#endif
/*
@@ -1078,6 +1213,7 @@ struct blk_plug {
struct list_head cb_list; /* md requires an unplug callback */
};
#define BLK_MAX_REQUEST_COUNT 16
+#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
struct blk_plug_cb;
typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
@@ -1151,6 +1287,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
struct bio **biop);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
+extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+ bool discard);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, bool discard);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
@@ -1354,6 +1493,46 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
return 0;
}
+static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return q->limits.max_write_zeroes_sectors;
+
+ return 0;
+}
+
+static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return blk_queue_zoned_model(q);
+
+ return BLK_ZONED_NONE;
+}
+
+static inline bool bdev_is_zoned(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return blk_queue_is_zoned(q);
+
+ return false;
+}
+
+static inline unsigned int bdev_zone_size(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return blk_queue_zone_size(q);
+
+ return 0;
+}
+
static inline int queue_dma_alignment(struct request_queue *q)
{
return q ? q->dma_alignment : 511;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cceb72f9e29f..e417f080219a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq)
}
extern void blk_dump_cmd(char *buf, struct request *rq);
-extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes);
+extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 8dbd7879fdc6..67bcef2ecddb 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -1,7 +1,7 @@
#ifndef __FS_CEPH_MESSENGER_H
#define __FS_CEPH_MESSENGER_H
-#include <linux/blk_types.h>
+#include <linux/bvec.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/net.h>
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index b91b023deffb..a52c6580cc9a 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -58,7 +58,7 @@ struct dm_io_notify {
struct dm_io_client;
struct dm_io_request {
int bi_op; /* REQ_OP */
- int bi_op_flags; /* rq_flag_bits */
+ int bi_op_flags; /* req_flag_bits */
struct dm_io_memory mem; /* Memory to use for io */
struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */
struct dm_io_client *client; /* Client memory handler */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e7f358d2e5fc..b276e9ef0e0b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -30,7 +30,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int);
typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, int, int);
+typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int);
typedef void (elevator_init_icq_fn) (struct io_cq *);
typedef void (elevator_exit_icq_fn) (struct io_cq *);
@@ -108,6 +108,11 @@ struct elevator_type
#define ELV_HASH_BITS 6
+void elv_rqhash_del(struct request_queue *q, struct request *rq);
+void elv_rqhash_add(struct request_queue *q, struct request *rq);
+void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
+struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
+
/*
* each queue has an elevator_queue associated with it
*/
@@ -139,7 +144,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request
extern struct request *elv_latter_request(struct request_queue *, struct request *);
extern int elv_register_queue(struct request_queue *q);
extern void elv_unregister_queue(struct request_queue *q);
-extern int elv_may_queue(struct request_queue *, int, int);
+extern int elv_may_queue(struct request_queue *, unsigned int);
extern void elv_completed_request(struct request_queue *, struct request *);
extern int elv_set_request(struct request_queue *q, struct request *rq,
struct bio *bio, gfp_t gfp_mask);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc0478c07b2a..b84230e070be 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -28,7 +28,6 @@
#include <linux/uidgid.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
-#include <linux/blk_types.h>
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>
#include <linux/delayed_call.h>
@@ -38,6 +37,7 @@
struct backing_dev_info;
struct bdi_writeback;
+struct bio;
struct export_operations;
struct hd_geometry;
struct iovec;
@@ -152,58 +152,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define CHECK_IOVEC_ONLY -1
/*
- * The below are the various read and write flags that we support. Some of
- * them include behavioral modifiers that send information down to the
- * block layer and IO scheduler. They should be used along with a req_op.
- * Terminology:
- *
- * The block layer uses device plugging to defer IO a little bit, in
- * the hope that we will see more IO very shortly. This increases
- * coalescing of adjacent IO and thus reduces the number of IOs we
- * have to send to the device. It also allows for better queuing,
- * if the IO isn't mergeable. If the caller is going to be waiting
- * for the IO, then he must ensure that the device is unplugged so
- * that the IO is dispatched to the driver.
- *
- * All IO is handled async in Linux. This is fine for background
- * writes, but for reads or writes that someone waits for completion
- * on, we want to notify the block layer and IO scheduler so that they
- * know about it. That allows them to make better scheduling
- * decisions. So when the below references 'sync' and 'async', it
- * is referencing this priority hint.
- *
- * With that in mind, the available types are:
- *
- * READ A normal read operation. Device will be plugged.
- * READ_SYNC A synchronous read. Device is not plugged, caller can
- * immediately wait on this read without caring about
- * unplugging.
- * WRITE A normal async write. Device will be plugged.
- * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down
- * the hint that someone will be waiting on this IO
- * shortly. The write equivalent of READ_SYNC.
- * WRITE_ODIRECT Special case write for O_DIRECT only.
- * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
- * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
- * non-volatile media on completion.
- * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
- * by a cache flush and data is guaranteed to be on
- * non-volatile media on completion.
- *
- */
-#define RW_MASK REQ_OP_WRITE
-
-#define READ REQ_OP_READ
-#define WRITE REQ_OP_WRITE
-
-#define READ_SYNC REQ_SYNC
-#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE)
-#define WRITE_ODIRECT REQ_SYNC
-#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
-#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
-
-/*
* Attribute flags. These should be or-ed together to figure out what
* has been changed!
*/
@@ -2499,19 +2447,6 @@ extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
#ifdef CONFIG_BLOCK
-static inline bool op_is_write(unsigned int op)
-{
- return op == REQ_OP_READ ? false : true;
-}
-
-/*
- * return data direction, READ or WRITE
- */
-static inline int bio_data_dir(struct bio *bio)
-{
- return op_is_write(bio_op(bio)) ? WRITE : READ;
-}
-
extern void check_disk_size_change(struct gendisk *disk,
struct block_device *bdev);
extern int revalidate_disk(struct gendisk *);
@@ -2782,7 +2717,6 @@ static inline void remove_inode_hash(struct inode *inode)
extern void inode_sb_list_add(struct inode *inode);
#ifdef CONFIG_BLOCK
-extern blk_qc_t submit_bio(struct bio *);
extern int bdev_read_only(struct block_device *);
#endif
extern int set_blocksize(struct block_device *, int);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bc6ed52a39b9..01b6b460c34d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -50,6 +50,10 @@
#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+/* generic data direction definitions */
+#define READ 0
+#define WRITE 1
+
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
#define u64_to_user_ptr(x) ( \
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d190786e4ad8..7c273bbc5351 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -47,6 +47,7 @@ struct ppa_addr {
struct nvm_rq;
struct nvm_id;
struct nvm_dev;
+struct nvm_tgt_dev;
typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
@@ -107,6 +108,8 @@ enum {
NVM_RSP_NOT_CHANGEABLE = 0x1,
NVM_RSP_ERR_FAILWRITE = 0x40ff,
NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
+ NVM_RSP_ERR_FAILECC = 0x4281,
+ NVM_RSP_WARN_HIGHECC = 0x4700,
/* Device opcodes */
NVM_OP_HBREAD = 0x02,
@@ -208,7 +211,7 @@ struct nvm_id {
struct nvm_target {
struct list_head list;
- struct nvm_dev *dev;
+ struct nvm_tgt_dev *dev;
struct nvm_tgt_type *type;
struct gendisk *disk;
};
@@ -228,7 +231,7 @@ typedef void (nvm_end_io_fn)(struct nvm_rq *);
struct nvm_rq {
struct nvm_tgt_instance *ins;
- struct nvm_dev *dev;
+ struct nvm_tgt_dev *dev;
struct bio *bio;
@@ -263,35 +266,12 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
return rqdata + 1;
}
-struct nvm_block;
-
-struct nvm_lun {
- int id;
-
- int lun_id;
- int chnl_id;
-
- spinlock_t lock;
-
- unsigned int nr_free_blocks; /* Number of unused blocks */
- struct nvm_block *blocks;
-};
-
enum {
NVM_BLK_ST_FREE = 0x1, /* Free block */
NVM_BLK_ST_TGT = 0x2, /* Block in use by target */
NVM_BLK_ST_BAD = 0x8, /* Bad block */
};
-struct nvm_block {
- struct list_head list;
- struct nvm_lun *lun;
- unsigned long id;
-
- void *priv;
- int state;
-};
-
/* system block cpu representation */
struct nvm_sb_info {
unsigned long seqnr;
@@ -301,22 +281,12 @@ struct nvm_sb_info {
struct ppa_addr fs_ppa;
};
-struct nvm_dev {
- struct nvm_dev_ops *ops;
-
- struct list_head devices;
-
- /* Media manager */
- struct nvmm_type *mt;
- void *mp;
-
- /* System blocks */
- struct nvm_sb_info sb;
-
- /* Device information */
+/* Device generic information */
+struct nvm_geo {
int nr_chnls;
+ int nr_luns;
+ int luns_per_chnl; /* -1 if channels are not symmetric */
int nr_planes;
- int luns_per_chnl;
int sec_per_pg; /* only sectors for a single page */
int pgs_per_blk;
int blks_per_lun;
@@ -336,14 +306,44 @@ struct nvm_dev {
int sec_per_pl; /* all sectors across planes */
int sec_per_blk;
int sec_per_lun;
+};
+
+struct nvm_tgt_dev {
+ /* Device information */
+ struct nvm_geo geo;
+
+ /* Base ppas for target LUNs */
+ struct ppa_addr *luns;
+
+ sector_t total_secs;
+
+ struct nvm_id identity;
+ struct request_queue *q;
+
+ struct nvm_dev *parent;
+ void *map;
+};
+
+struct nvm_dev {
+ struct nvm_dev_ops *ops;
+
+ struct list_head devices;
+
+ /* Media manager */
+ struct nvmm_type *mt;
+ void *mp;
+
+ /* System blocks */
+ struct nvm_sb_info sb;
+
+ /* Device information */
+ struct nvm_geo geo;
/* lower page table */
int lps_per_blk;
int *lptbl;
- unsigned long total_blocks;
unsigned long total_secs;
- int nr_luns;
unsigned long *lun_map;
void *dma_pool;
@@ -352,26 +352,57 @@ struct nvm_dev {
/* Backend device */
struct request_queue *q;
- struct device dev;
- struct device *parent_dev;
char name[DISK_NAME_LEN];
void *private_data;
+ void *rmap;
+
struct mutex mlock;
spinlock_t lock;
};
+static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
+ u64 pba)
+{
+ struct ppa_addr l;
+ int secs, pgs, blks, luns;
+ sector_t ppa = pba;
+
+ l.ppa = 0;
+
+ div_u64_rem(ppa, geo->sec_per_pg, &secs);
+ l.g.sec = secs;
+
+ sector_div(ppa, geo->sec_per_pg);
+ div_u64_rem(ppa, geo->pgs_per_blk, &pgs);
+ l.g.pg = pgs;
+
+ sector_div(ppa, geo->pgs_per_blk);
+ div_u64_rem(ppa, geo->blks_per_lun, &blks);
+ l.g.blk = blks;
+
+ sector_div(ppa, geo->blks_per_lun);
+ div_u64_rem(ppa, geo->luns_per_chnl, &luns);
+ l.g.lun = luns;
+
+ sector_div(ppa, geo->luns_per_chnl);
+ l.g.ch = ppa;
+
+ return l;
+}
+
static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
+ struct nvm_geo *geo = &dev->geo;
struct ppa_addr l;
- l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
- l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
- l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
- l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
- l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
- l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
+ l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset;
+ l.ppa |= ((u64)r.g.pg) << geo->ppaf.pg_offset;
+ l.ppa |= ((u64)r.g.sec) << geo->ppaf.sect_offset;
+ l.ppa |= ((u64)r.g.pl) << geo->ppaf.pln_offset;
+ l.ppa |= ((u64)r.g.lun) << geo->ppaf.lun_offset;
+ l.ppa |= ((u64)r.g.ch) << geo->ppaf.ch_offset;
return l;
}
@@ -379,24 +410,25 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
struct ppa_addr r)
{
+ struct nvm_geo *geo = &dev->geo;
struct ppa_addr l;
l.ppa = 0;
/*
* (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
*/
- l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
- (((1 << dev->ppaf.blk_len) - 1));
- l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
- (((1 << dev->ppaf.pg_len) - 1));
- l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
- (((1 << dev->ppaf.sect_len) - 1));
- l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
- (((1 << dev->ppaf.pln_len) - 1));
- l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
- (((1 << dev->ppaf.lun_len) - 1));
- l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
- (((1 << dev->ppaf.ch_len) - 1));
+ l.g.blk = (r.ppa >> geo->ppaf.blk_offset) &
+ (((1 << geo->ppaf.blk_len) - 1));
+ l.g.pg |= (r.ppa >> geo->ppaf.pg_offset) &
+ (((1 << geo->ppaf.pg_len) - 1));
+ l.g.sec |= (r.ppa >> geo->ppaf.sect_offset) &
+ (((1 << geo->ppaf.sect_len) - 1));
+ l.g.pl |= (r.ppa >> geo->ppaf.pln_offset) &
+ (((1 << geo->ppaf.pln_len) - 1));
+ l.g.lun |= (r.ppa >> geo->ppaf.lun_offset) &
+ (((1 << geo->ppaf.lun_len) - 1));
+ l.g.ch |= (r.ppa >> geo->ppaf.ch_offset) &
+ (((1 << geo->ppaf.ch_len) - 1));
return l;
}
@@ -411,18 +443,13 @@ static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
ppa_addr->ppa = ADDR_EMPTY;
}
-static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
- struct nvm_block *blk)
+static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
{
- struct ppa_addr ppa;
- struct nvm_lun *lun = blk->lun;
-
- ppa.ppa = 0;
- ppa.g.blk = blk->id % dev->blks_per_lun;
- ppa.g.lun = lun->lun_id;
- ppa.g.ch = lun->chnl_id;
+ if (ppa_empty(ppa1) || ppa_empty(ppa2))
+ return 0;
- return ppa;
+ return ((ppa1.g.ch == ppa2.g.ch) && (ppa1.g.lun == ppa2.g.lun) &&
+ (ppa1.g.blk == ppa2.g.blk));
}
static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
@@ -432,7 +459,7 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
typedef void (nvm_tgt_exit_fn)(void *);
struct nvm_tgt_type {
@@ -465,23 +492,18 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
-typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
- struct nvm_lun *, unsigned long);
-typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
- unsigned long);
-typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
-typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
-typedef void (nvmm_release_lun)(struct nvm_dev *, int);
-typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
-
+typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
+typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *,
+ struct ppa_addr, int);
+typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
+
+enum {
+ TRANS_TGT_TO_DEV = 0x0,
+ TRANS_DEV_TO_TGT = 0x1,
+};
struct nvmm_type {
const char *name;
@@ -493,54 +515,41 @@ struct nvmm_type {
nvmm_create_tgt_fn *create_tgt;
nvmm_remove_tgt_fn *remove_tgt;
- /* Block administration callbacks */
- nvmm_get_blk_fn *get_blk;
- nvmm_put_blk_fn *put_blk;
- nvmm_open_blk_fn *open_blk;
- nvmm_close_blk_fn *close_blk;
- nvmm_flush_blk_fn *flush_blk;
-
nvmm_submit_io_fn *submit_io;
nvmm_erase_blk_fn *erase_blk;
- /* Bad block mgmt */
- nvmm_mark_blk_fn *mark_blk;
-
- /* Configuration management */
- nvmm_get_lun_fn *get_lun;
- nvmm_reserve_lun *reserve_lun;
- nvmm_release_lun *release_lun;
-
- /* Statistics */
- nvmm_lun_info_print_fn *lun_info_print;
-
nvmm_get_area_fn *get_area;
nvmm_put_area_fn *put_area;
+ nvmm_trans_ppa_fn *trans_ppa;
+ nvmm_part_to_tgt_fn *part_to_tgt;
+
struct list_head list;
};
extern int nvm_register_mgr(struct nvmm_type *);
extern void nvm_unregister_mgr(struct nvmm_type *);
-extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
- unsigned long);
-extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
-
extern struct nvm_dev *nvm_alloc_dev(int);
extern int nvm_register(struct nvm_dev *);
extern void nvm_unregister(struct nvm_dev *);
-void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
-
-extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
+ int, int);
+extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
+extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
const struct ppa_addr *, int, int);
extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
-extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
+extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
+extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
+ void *);
+extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
+extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
extern void nvm_end_io(struct nvm_rq *, int);
extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
void *, int);
@@ -548,6 +557,7 @@ extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int,
int, void *, int);
extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
+extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
/* sysblk.c */
#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
@@ -569,10 +579,10 @@ extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
extern int nvm_dev_factory(struct nvm_dev *, int flags);
-#define nvm_for_each_lun_ppa(dev, ppa, chid, lunid) \
- for ((chid) = 0, (ppa).ppa = 0; (chid) < (dev)->nr_chnls; \
+#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid) \
+ for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls; \
(chid)++, (ppa).g.ch = (chid)) \
- for ((lunid) = 0; (lunid) < (dev)->luns_per_chnl; \
+ for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl; \
(lunid)++, (ppa).g.lun = (lunid))
#else /* CONFIG_NVM */
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
new file mode 100644
index 000000000000..f21471f7ee40
--- /dev/null
+++ b/include/linux/nvme-fc-driver.h
@@ -0,0 +1,851 @@
+/*
+ * Copyright (c) 2016, Avago Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_FC_DRIVER_H
+#define _NVME_FC_DRIVER_H 1
+
+
+/*
+ * ********************** LLDD FC-NVME Host API ********************
+ *
+ * For FC LLDD's that are the NVME Host role.
+ *
+ * ******************************************************************
+ */
+
+
+
+/* FC Port role bitmask - can merge with FC Port Roles in fc transport */
+#define FC_PORT_ROLE_NVME_INITIATOR 0x10
+#define FC_PORT_ROLE_NVME_TARGET 0x11
+#define FC_PORT_ROLE_NVME_DISCOVERY 0x12
+
+
+/**
+ * struct nvme_fc_port_info - port-specific ids and FC connection-specific
+ * data element used during NVME Host role
+ * registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must
+ * be set to 0.
+ */
+struct nvme_fc_port_info {
+ u64 node_name;
+ u64 port_name;
+ u32 port_role;
+ u32 port_id;
+};
+
+
+/**
+ * struct nvmefc_ls_req - Request structure passed from NVME-FC transport
+ * to LLDD in order to perform a NVME FC-4 LS
+ * request and obtain a response.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD ls_req
+ * entrypoint.
+ * @rqstaddr: pointer to request buffer
+ * @rqstdma: PCI DMA address of request buffer
+ * @rqstlen: Length, in bytes, of request buffer
+ * @rspaddr: pointer to response buffer
+ * @rspdma: PCI DMA address of response buffer
+ * @rsplen: Length, in bytes, of response buffer
+ * @timeout: Maximum amount of time, in seconds, to wait for the LS response.
+ * If timeout exceeded, LLDD to abort LS exchange and complete
+ * LS request with error status.
+ * @private: pointer to memory allocated alongside the ls request structure
+ * that is specifically for the LLDD to use while processing the
+ * request. The length of the buffer corresponds to the
+ * lsrqst_priv_sz value specified in the nvme_fc_port_template
+ * supplied by the LLDD.
+ * @done: The callback routine the LLDD is to invoke upon completion of
+ * the LS request. req argument is the pointer to the original LS
+ * request structure. Status argument must be 0 upon success, a
+ * negative errno on failure (example: -ENXIO).
+ */
+struct nvmefc_ls_req {
+ void *rqstaddr;
+ dma_addr_t rqstdma;
+ u32 rqstlen;
+ void *rspaddr;
+ dma_addr_t rspdma;
+ u32 rsplen;
+ u32 timeout;
+
+ void *private;
+
+ void (*done)(struct nvmefc_ls_req *req, int status);
+
+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
+
+
+enum nvmefc_fcp_datadir {
+ NVMEFC_FCP_NODATA, /* payload_length and sg_cnt will be zero */
+ NVMEFC_FCP_WRITE,
+ NVMEFC_FCP_READ,
+};
+
+
+#define NVME_FC_MAX_SEGMENTS 256
+
+/**
+ * struct nvmefc_fcp_req - Request structure passed from NVME-FC transport
+ * to LLDD in order to perform a NVME FCP IO operation.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD fcp_io
+ * entrypoint.
+ * @cmdaddr: pointer to the FCP CMD IU buffer
+ * @rspaddr: pointer to the FCP RSP IU buffer
+ * @cmddma: PCI DMA address of the FCP CMD IU buffer
+ * @rspdma: PCI DMA address of the FCP RSP IU buffer
+ * @cmdlen: Length, in bytes, of the FCP CMD IU buffer
+ * @rsplen: Length, in bytes, of the FCP RSP IU buffer
+ * @payload_length: Length of DATA_IN or DATA_OUT payload data to transfer
+ * @sg_table: scatter/gather structure for payload data
+ * @first_sgl: memory for 1st scatter/gather list segment for payload data
+ * @sg_cnt: number of elements in the scatter/gather list
+ * @io_dir: direction of the FCP request (see NVMEFC_FCP_xxx)
+ * @sqid: The nvme SQID the command is being issued on
+ * @done: The callback routine the LLDD is to invoke upon completion of
+ * the FCP operation. req argument is the pointer to the original
+ * FCP IO operation.
+ * @private: pointer to memory allocated alongside the FCP operation
+ * request structure that is specifically for the LLDD to use
+ * while processing the operation. The length of the buffer
+ * corresponds to the fcprqst_priv_sz value specified in the
+ * nvme_fc_port_template supplied by the LLDD.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of payload data, in bytes, that were
+ * transferred. Should equal payload_length on success.
+ * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
+ * @status: Completion status of the FCP operation. must be 0 upon success,
+ * NVME_SC_FC_xxx value upon failure. Note: this is NOT a
+ * reflection of the NVME CQE completion status. Only the status
+ * of the FCP operation at the NVME-FC level.
+ */
+struct nvmefc_fcp_req {
+ void *cmdaddr;
+ void *rspaddr;
+ dma_addr_t cmddma;
+ dma_addr_t rspdma;
+ u16 cmdlen;
+ u16 rsplen;
+
+ u32 payload_length;
+ struct sg_table sg_table;
+ struct scatterlist *first_sgl;
+ int sg_cnt;
+ enum nvmefc_fcp_datadir io_dir;
+
+ __le16 sqid;
+
+ void (*done)(struct nvmefc_fcp_req *req);
+
+ void *private;
+
+ u32 transferred_length;
+ u16 rcv_rsplen;
+ u32 status;
+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
+
+
+/*
+ * Direct copy of fc_port_state enum. For later merging
+ */
+enum nvme_fc_obj_state {
+ FC_OBJSTATE_UNKNOWN,
+ FC_OBJSTATE_NOTPRESENT,
+ FC_OBJSTATE_ONLINE,
+ FC_OBJSTATE_OFFLINE, /* User has taken Port Offline */
+ FC_OBJSTATE_BLOCKED,
+ FC_OBJSTATE_BYPASSED,
+ FC_OBJSTATE_DIAGNOSTICS,
+ FC_OBJSTATE_LINKDOWN,
+ FC_OBJSTATE_ERROR,
+ FC_OBJSTATE_LOOPBACK,
+ FC_OBJSTATE_DELETED,
+};
+
+
+/**
+ * struct nvme_fc_local_port - structure used between NVME-FC transport and
+ * a LLDD to reference a local NVME host port.
+ * Allocated/created by the nvme_fc_register_localport()
+ * transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num: NVME-FC transport host port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private: pointer to memory allocated alongside the local port
+ * structure that is specifically for the LLDD to use.
+ * The length of the buffer corresponds to the local_priv_sz
+ * value specified in the nvme_fc_port_template supplied by
+ * the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must
+ * be set to 0.
+ * @port_state: Operational state of the port.
+ */
+struct nvme_fc_local_port {
+ /* static/read-only fields */
+ u32 port_num;
+ u32 port_role;
+ u64 node_name;
+ u64 port_name;
+
+ void *private;
+
+ /* dynamic fields */
+ u32 port_id;
+ enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_remote_port - structure used between NVME-FC transport and
+ * a LLDD to reference a remote NVME subsystem port.
+ * Allocated/created by the nvme_fc_register_remoteport()
+ * transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num: NVME-FC transport remote subsystem port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @localport: pointer to the NVME-FC local host port the subsystem is
+ * connected to.
+ * @private: pointer to memory allocated alongside the remote port
+ * structure that is specifically for the LLDD to use.
+ * The length of the buffer corresponds to the remote_priv_sz
+ * value specified in the nvme_fc_port_template supplied by
+ * the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link or login
+ * state. LLDD may reference fields directly to change them. Initialized by
+ * the port_info struct supplied to the registration call.
+ * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must
+ * be set to 0.
+ * @port_state: Operational state of the remote port. Valid values are
+ * ONLINE or UNKNOWN.
+ */
+struct nvme_fc_remote_port {
+ /* static fields */
+ u32 port_num;
+ u32 port_role;
+ u64 node_name;
+ u64 port_name;
+
+ struct nvme_fc_local_port *localport;
+
+ void *private;
+
+ /* dynamic fields */
+ u32 port_id;
+ enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_port_template - structure containing static entrypoints and
+ * operational parameters for an LLDD that supports NVME host
+ * behavior. Passed by reference in port registrations.
+ * NVME-FC transport remembers template reference and may
+ * access it during runtime operation.
+ *
+ * Host/Initiator Transport Entrypoints/Parameters:
+ *
+ * @localport_delete: The LLDD initiates deletion of a localport via
+ * nvme_fc_deregister_localport(). However, the teardown is
+ * asynchronous. This routine is called upon the completion of the
+ * teardown to inform the LLDD that the localport has been deleted.
+ * Entrypoint is Mandatory.
+ *
+ * @remoteport_delete: The LLDD initiates deletion of a remoteport via
+ * nvme_fc_deregister_remoteport(). However, the teardown is
+ * asynchronous. This routine is called upon the completion of the
+ * teardown to inform the LLDD that the remoteport has been deleted.
+ * Entrypoint is Mandatory.
+ *
+ * @create_queue: Upon creating a host<->controller association, queues are
+ * created such that they can be affinitized to cpus/cores. This
+ * callback into the LLDD to notify that a controller queue is being
+ * created. The LLDD may choose to allocate an associated hw queue
+ * or map it onto a shared hw queue. Upon return from the call, the
+ * LLDD specifies a handle that will be given back to it for any
+ * command that is posted to the controller queue. The handle can
+ * be used by the LLDD to map quickly to the proper hw queue for
+ * command execution. The mask of cpu's that will map to this queue
+ * at the block-level is also passed in. The LLDD should use the
+ * queue id and/or cpu masks to ensure proper affinitization of the
+ * controller queue to the hw queue.
+ * Entrypoint is Optional.
+ *
+ * @delete_queue: This is the inverse of the crete_queue. During
+ * host<->controller association teardown, this routine is called
+ * when a controller queue is being terminated. Any association with
+ * a hw queue should be termined. If there is a unique hw queue, the
+ * hw queue should be torn down.
+ * Entrypoint is Optional.
+ *
+ * @poll_queue: Called to poll for the completion of an io on a blk queue.
+ * Entrypoint is Optional.
+ *
+ * @ls_req: Called to issue a FC-NVME FC-4 LS service request.
+ * The nvme_fc_ls_req structure will fully describe the buffers for
+ * the request payload and where to place the response payload. The
+ * LLDD is to allocate an exchange, issue the LS request, obtain the
+ * LS response, and call the "done" routine specified in the request
+ * structure (argument to done is the ls request structure itself).
+ * Entrypoint is Mandatory.
+ *
+ * @fcp_io: called to issue a FC-NVME I/O request. The I/O may be for
+ * an admin queue or an i/o queue. The nvmefc_fcp_req structure will
+ * fully describe the io: the buffer containing the FC-NVME CMD IU
+ * (which contains the SQE), the sg list for the payload if applicable,
+ * and the buffer to place the FC-NVME RSP IU into. The LLDD will
+ * complete the i/o, indicating the amount of data transferred or
+ * any transport error, and call the "done" routine specified in the
+ * request structure (argument to done is the fcp request structure
+ * itself).
+ * Entrypoint is Mandatory.
+ *
+ * @ls_abort: called to request the LLDD to abort the indicated ls request.
+ * The call may return before the abort has completed. After aborting
+ * the request, the LLDD must still call the ls request done routine
+ * indicating an FC transport Aborted status.
+ * Entrypoint is Mandatory.
+ *
+ * @fcp_abort: called to request the LLDD to abort the indicated fcp request.
+ * The call may return before the abort has completed. After aborting
+ * the request, the LLDD must still call the fcp request done routine
+ * indicating an FC transport Aborted status.
+ * Entrypoint is Mandatory.
+ *
+ * @max_hw_queues: indicates the maximum number of hw queues the LLDD
+ * supports for cpu affinitization.
+ * Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments: indicates the maximum number of sgl segments supported
+ * by the LLDD
+ * Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments: indicates the maximum number of sgl segments
+ * supported by the LLDD for DIF operations.
+ * Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary: indicates the dma address boundary where dma mappings
+ * will be split across.
+ * Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ * 4Gig address boundarys
+ *
+ * @local_priv_sz: The LLDD sets this field to the amount of additional
+ * memory that it would like fc nvme layer to allocate on the LLDD's
+ * behalf whenever a localport is allocated. The additional memory
+ * area solely for the of the LLDD and its location is specified by
+ * the localport->private pointer.
+ * Value is Mandatory. Allowed to be zero.
+ *
+ * @remote_priv_sz: The LLDD sets this field to the amount of additional
+ * memory that it would like fc nvme layer to allocate on the LLDD's
+ * behalf whenever a remoteport is allocated. The additional memory
+ * area solely for the of the LLDD and its location is specified by
+ * the remoteport->private pointer.
+ * Value is Mandatory. Allowed to be zero.
+ *
+ * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
+ * memory that it would like fc nvme layer to allocate on the LLDD's
+ * behalf whenever a ls request structure is allocated. The additional
+ * memory area solely for the of the LLDD and its location is
+ * specified by the ls_request->private pointer.
+ * Value is Mandatory. Allowed to be zero.
+ *
+ * @fcprqst_priv_sz: The LLDD sets this field to the amount of additional
+ * memory that it would like fc nvme layer to allocate on the LLDD's
+ * behalf whenever a fcp request structure is allocated. The additional
+ * memory area solely for the of the LLDD and its location is
+ * specified by the fcp_request->private pointer.
+ * Value is Mandatory. Allowed to be zero.
+ */
+struct nvme_fc_port_template {
+ /* initiator-based functions */
+ void (*localport_delete)(struct nvme_fc_local_port *);
+ void (*remoteport_delete)(struct nvme_fc_remote_port *);
+ int (*create_queue)(struct nvme_fc_local_port *,
+ unsigned int qidx, u16 qsize,
+ void **handle);
+ void (*delete_queue)(struct nvme_fc_local_port *,
+ unsigned int qidx, void *handle);
+ void (*poll_queue)(struct nvme_fc_local_port *, void *handle);
+ int (*ls_req)(struct nvme_fc_local_port *,
+ struct nvme_fc_remote_port *,
+ struct nvmefc_ls_req *);
+ int (*fcp_io)(struct nvme_fc_local_port *,
+ struct nvme_fc_remote_port *,
+ void *hw_queue_handle,
+ struct nvmefc_fcp_req *);
+ void (*ls_abort)(struct nvme_fc_local_port *,
+ struct nvme_fc_remote_port *,
+ struct nvmefc_ls_req *);
+ void (*fcp_abort)(struct nvme_fc_local_port *,
+ struct nvme_fc_remote_port *,
+ void *hw_queue_handle,
+ struct nvmefc_fcp_req *);
+
+ u32 max_hw_queues;
+ u16 max_sgl_segments;
+ u16 max_dif_sgl_segments;
+ u64 dma_boundary;
+
+ /* sizes of additional private data for data structures */
+ u32 local_priv_sz;
+ u32 remote_priv_sz;
+ u32 lsrqst_priv_sz;
+ u32 fcprqst_priv_sz;
+};
+
+
+/*
+ * Initiator/Host functions
+ */
+
+int nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
+ struct nvme_fc_port_template *template,
+ struct device *dev,
+ struct nvme_fc_local_port **lport_p);
+
+int nvme_fc_unregister_localport(struct nvme_fc_local_port *localport);
+
+int nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
+ struct nvme_fc_port_info *pinfo,
+ struct nvme_fc_remote_port **rport_p);
+
+int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport);
+
+
+
+/*
+ * *************** LLDD FC-NVME Target/Subsystem API ***************
+ *
+ * For FC LLDD's that are the NVME Subsystem role
+ *
+ * ******************************************************************
+ */
+
+/**
+ * struct nvmet_fc_port_info - port-specific ids and FC connection-specific
+ * data element used during NVME Subsystem role
+ * registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must
+ * be set to 0.
+ */
+struct nvmet_fc_port_info {
+ u64 node_name;
+ u64 port_name;
+ u32 port_id;
+};
+
+
+/**
+ * struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC
+ * layer to represent the exchange context for
+ * a FC-NVME Link Service (LS).
+ *
+ * The structure is allocated by the LLDD whenever a LS Request is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_ls_req() call. The address of the structure
+ * will be passed back to the LLDD when the response is to be transmit.
+ * The LLDD is to use the address to map back to the LLDD exchange structure
+ * which maintains information such as the targetport the LS was received
+ * on, the remote FC NVME initiator that sent the LS, and any FC exchange
+ * context. Upon completion of the LS response transmit, the address of the
+ * structure will be passed back to the LS rsp done() routine, allowing the
+ * nvmet-fc layer to release dma resources. Upon completion of the done()
+ * routine, no further access will be made by the nvmet-fc layer and the
+ * LLDD can de-allocate the structure.
+ *
+ * Field initialization:
+ * At the time of the nvmet_fc_rcv_ls_req() call, there is no content that
+ * is valid in the structure.
+ *
+ * When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc
+ * layer will fully set the fields in order to specify the response
+ * payload buffer and its length as well as the done routine to be called
+ * upon compeletion of the transmit. The nvmet-fc layer will also set a
+ * private pointer for its own use in the done routine.
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp
+ * entrypoint.
+ * @rspbuf: pointer to the LS response buffer
+ * @rspdma: PCI DMA address of the LS response buffer
+ * @rsplen: Length, in bytes, of the LS response buffer
+ * @done: The callback routine the LLDD is to invoke upon completion of
+ * transmitting the LS response. req argument is the pointer to
+ * the original ls request.
+ * @nvmet_fc_private: pointer to an internal NVMET-FC layer structure used
+ * as part of the NVMET-FC processing. The LLDD is not to access
+ * this pointer.
+ */
+struct nvmefc_tgt_ls_req {
+ void *rspbuf;
+ dma_addr_t rspdma;
+ u16 rsplen;
+
+ void (*done)(struct nvmefc_tgt_ls_req *req);
+ void *nvmet_fc_private; /* LLDD is not to access !! */
+};
+
+/* Operations that NVME-FC layer may request the LLDD to perform for FCP */
+enum {
+ NVMET_FCOP_READDATA = 1, /* xmt data to initiator */
+ NVMET_FCOP_WRITEDATA = 2, /* xmt data from initiator */
+ NVMET_FCOP_READDATA_RSP = 3, /* xmt data to initiator and send
+ * rsp as well
+ */
+ NVMET_FCOP_RSP = 4, /* send rsp frame */
+ NVMET_FCOP_ABORT = 5, /* abort exchange via ABTS */
+ NVMET_FCOP_BA_ACC = 6, /* send BA_ACC */
+ NVMET_FCOP_BA_RJT = 7, /* send BA_RJT */
+};
+
+/**
+ * struct nvmefc_tgt_fcp_req - Structure used between LLDD and NVMET-FC
+ * layer to represent the exchange context and
+ * the specific FC-NVME IU operation(s) to perform
+ * for a FC-NVME FCP IO.
+ *
+ * Structure used between LLDD and nvmet-fc layer to represent the exchange
+ * context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related
+ * memory transfers, and its assocated cqe transfer).
+ *
+ * The structure is allocated by the LLDD whenever a FCP CMD IU is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_fcp_req() call. The address of the structure
+ * will be passed back to the LLDD for the data operations and transmit of
+ * the response. The LLDD is to use the address to map back to the LLDD
+ * exchange structure which maintains information such as the targetport
+ * the FCP I/O was received on, the remote FC NVME initiator that sent the
+ * FCP I/O, and any FC exchange context. Upon completion of the FCP target
+ * operation, the address of the structure will be passed back to the FCP
+ * op done() routine, allowing the nvmet-fc layer to release dma resources.
+ * Upon completion of the done() routine for either RSP or ABORT ops, no
+ * further access will be made by the nvmet-fc layer and the LLDD can
+ * de-allocate the structure.
+ *
+ * Field initialization:
+ * At the time of the nvmet_fc_rcv_fcp_req() call, there is no content that
+ * is valid in the structure.
+ *
+ * When the structure is used for an FCP target operation, the nvmet-fc
+ * layer will fully set the fields in order to specify the scattergather
+ * list, the transfer length, as well as the done routine to be called
+ * upon compeletion of the operation. The nvmet-fc layer will also set a
+ * private pointer for its own use in the done routine.
+ *
+ * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !!
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op
+ * entrypoint.
+ * @op: Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx)
+ * @hwqid: Specifies the hw queue index (0..N-1, where N is the
+ * max_hw_queues value from the LLD's nvmet_fc_target_template)
+ * that the operation is to use.
+ * @offset: Indicates the DATA_OUT/DATA_IN payload offset to be tranferred.
+ * Field is only valid on WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @timeout: amount of time, in seconds, to wait for a response from the NVME
+ * host. A value of 0 is an infinite wait.
+ * Valid only for the following ops:
+ * WRITEDATA: caps the wait for data reception
+ * READDATA_RSP & RSP: caps wait for FCP_CONF reception (if used)
+ * @transfer_length: the length, in bytes, of the DATA_OUT or DATA_IN payload
+ * that is to be transferred.
+ * Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @ba_rjt: Contains the BA_RJT payload that is to be transferred.
+ * Valid only for the NVMET_FCOP_BA_RJT op.
+ * @sg: Scatter/gather list for the DATA_OUT/DATA_IN payload data.
+ * Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @sg_cnt: Number of valid entries in the scatter/gather list.
+ * Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @rspaddr: pointer to the FCP RSP IU buffer to be transmit
+ * Used by RSP and READDATA_RSP ops
+ * @rspdma: PCI DMA address of the FCP RSP IU buffer
+ * Used by RSP and READDATA_RSP ops
+ * @rsplen: Length, in bytes, of the FCP RSP IU buffer
+ * Used by RSP and READDATA_RSP ops
+ * @done: The callback routine the LLDD is to invoke upon completion of
+ * the operation. req argument is the pointer to the original
+ * FCP subsystem op request.
+ * @nvmet_fc_private: pointer to an internal NVMET-FC layer structure used
+ * as part of the NVMET-FC processing. The LLDD is not to
+ * reference this field.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of DATA_OUT payload data received by a
+ * a WRITEDATA operation. If not a WRITEDATA operation, value must
+ * be set to 0. Should equal transfer_length on success.
+ * @fcp_error: status of the FCP operation. Must be 0 on success; on failure
+ * must be a NVME_SC_FC_xxxx value.
+ */
+struct nvmefc_tgt_fcp_req {
+ u8 op;
+ u16 hwqid;
+ u32 offset;
+ u32 timeout;
+ u32 transfer_length;
+ struct fc_ba_rjt ba_rjt;
+ struct scatterlist sg[NVME_FC_MAX_SEGMENTS];
+ int sg_cnt;
+ void *rspaddr;
+ dma_addr_t rspdma;
+ u16 rsplen;
+
+ void (*done)(struct nvmefc_tgt_fcp_req *);
+
+ void *nvmet_fc_private; /* LLDD is not to access !! */
+
+ u32 transferred_length;
+ int fcp_error;
+};
+
+
+/* Target Features (Bit fields) LLDD supports */
+enum {
+ NVMET_FCTGTFEAT_READDATA_RSP = (1 << 0),
+ /* Bit 0: supports the NVMET_FCPOP_READDATA_RSP op, which
+ * sends (the last) Read Data sequence followed by the RSP
+ * sequence in one LLDD operation. Errors during Data
+ * sequence transmit must not allow RSP sequence to be sent.
+ */
+ NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1),
+ /* Bit 1: When 0, the LLDD will deliver FCP CMD
+ * on the CPU it should be affinitized to. Thus work will
+ * be scheduled on the cpu received on. When 1, the LLDD
+ * may not deliver the CMD on the CPU it should be worked
+ * on. The transport should pick a cpu to schedule the work
+ * on.
+ */
+};
+
+
+/**
+ * struct nvmet_fc_target_port - structure used between NVME-FC transport and
+ * a LLDD to reference a local NVME subsystem port.
+ * Allocated/created by the nvme_fc_register_targetport()
+ * transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num: NVME-FC transport subsytem port number
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private: pointer to memory allocated alongside the local port
+ * structure that is specifically for the LLDD to use.
+ * The length of the buffer corresponds to the target_priv_sz
+ * value specified in the nvme_fc_target_template supplied by
+ * the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must
+ * be set to 0.
+ * @port_state: Operational state of the port.
+ */
+struct nvmet_fc_target_port {
+ /* static/read-only fields */
+ u32 port_num;
+ u64 node_name;
+ u64 port_name;
+
+ void *private;
+
+ /* dynamic fields */
+ u32 port_id;
+ enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvmet_fc_target_template - structure containing static entrypoints
+ * and operational parameters for an LLDD that supports NVME
+ * subsystem behavior. Passed by reference in port
+ * registrations. NVME-FC transport remembers template
+ * reference and may access it during runtime operation.
+ *
+ * Subsystem/Target Transport Entrypoints/Parameters:
+ *
+ * @targetport_delete: The LLDD initiates deletion of a targetport via
+ * nvmet_fc_unregister_targetport(). However, the teardown is
+ * asynchronous. This routine is called upon the completion of the
+ * teardown to inform the LLDD that the targetport has been deleted.
+ * Entrypoint is Mandatory.
+ *
+ * @xmt_ls_rsp: Called to transmit the response to a FC-NVME FC-4 LS service.
+ * The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange
+ * structure specified in the nvmet_fc_rcv_ls_req() call made when
+ * the LS request was received. The structure will fully describe
+ * the buffers for the response payload and the dma address of the
+ * payload. The LLDD is to transmit the response (or return a non-zero
+ * errno status), and upon completion of the transmit, call the
+ * "done" routine specified in the nvmefc_tgt_ls_req structure
+ * (argument to done is the ls reqwuest structure itself).
+ * After calling the done routine, the LLDD shall consider the
+ * LS handling complete and the nvmefc_tgt_ls_req structure may
+ * be freed/released.
+ * Entrypoint is Mandatory.
+ *
+ * @fcp_op: Called to perform a data transfer, transmit a response, or
+ * abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same
+ * LLDD-supplied exchange structure specified in the
+ * nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received.
+ * The op field in the structure shall indicate the operation for
+ * the LLDD to perform relative to the io.
+ * NVMET_FCOP_READDATA operation: the LLDD is to send the
+ * payload data (described by sglist) to the host in 1 or
+ * more FC sequences (preferrably 1). Note: the fc-nvme layer
+ * may call the READDATA operation multiple times for longer
+ * payloads.
+ * NVMET_FCOP_WRITEDATA operation: the LLDD is to receive the
+ * payload data (described by sglist) from the host via 1 or
+ * more FC sequences (preferrably 1). The LLDD is to generate
+ * the XFER_RDY IU(s) corresponding to the data being requested.
+ * Note: the FC-NVME layer may call the WRITEDATA operation
+ * multiple times for longer payloads.
+ * NVMET_FCOP_READDATA_RSP operation: the LLDD is to send the
+ * payload data (described by sglist) to the host in 1 or
+ * more FC sequences (preferrably 1). If an error occurs during
+ * payload data transmission, the LLDD is to set the
+ * nvmefc_tgt_fcp_req fcp_error and transferred_length field, then
+ * consider the operation complete. On error, the LLDD is to not
+ * transmit the FCP_RSP iu. If all payload data is transferred
+ * successfully, the LLDD is to update the nvmefc_tgt_fcp_req
+ * transferred_length field and may subsequently transmit the
+ * FCP_RSP iu payload (described by rspbuf, rspdma, rsplen).
+ * The LLDD is to await FCP_CONF reception to confirm the RSP
+ * reception by the host. The LLDD may retramsit the FCP_RSP iu
+ * if necessary per FC-NVME. Upon reception of FCP_CONF, or upon
+ * FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req
+ * fcp_error field and consider the operation complete..
+ * NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload
+ * (described by rspbuf, rspdma, rsplen). The LLDD is to await
+ * FCP_CONF reception to confirm the RSP reception by the host.
+ * The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME.
+ * Upon reception of FCP_CONF, or upon FCP_CONF failure, the
+ * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and
+ * consider the operation complete..
+ * NVMET_FCOP_ABORT: the LLDD is to terminate the exchange
+ * corresponding to the fcp operation. The LLDD shall send
+ * ABTS and follow FC exchange abort-multi rules, including
+ * ABTS retries and possible logout.
+ * Upon completing the indicated operation, the LLDD is to set the
+ * status fields for the operation (tranferred_length and fcp_error
+ * status) in the request, then all the "done" routine
+ * indicated in the fcp request. Upon return from the "done"
+ * routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation
+ * the fc-nvme layer will not longer reference the fcp request,
+ * allowing the LLDD to free/release the fcp request.
+ * Note: when calling the done routine for READDATA or WRITEDATA
+ * operations, the fc-nvme layer may immediate convert, in the same
+ * thread and before returning to the LLDD, the fcp operation to
+ * the next operation for the fcp io and call the LLDDs fcp_op
+ * call again. If fields in the fcp request are to be accessed post
+ * the done call, the LLDD should save their values prior to calling
+ * the done routine, and inspect the save values after the done
+ * routine.
+ * Returns 0 on success, -<errno> on failure (Ex: -EIO)
+ * Entrypoint is Mandatory.
+ *
+ * @max_hw_queues: indicates the maximum number of hw queues the LLDD
+ * supports for cpu affinitization.
+ * Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments: indicates the maximum number of sgl segments supported
+ * by the LLDD
+ * Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments: indicates the maximum number of sgl segments
+ * supported by the LLDD for DIF operations.
+ * Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary: indicates the dma address boundary where dma mappings
+ * will be split across.
+ * Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ * 4Gig address boundarys
+ *
+ * @target_features: The LLDD sets bits in this field to correspond to
+ * optional features that are supported by the LLDD.
+ * Refer to the NVMET_FCTGTFEAT_xxx values.
+ * Value is Mandatory. Allowed to be zero.
+ *
+ * @target_priv_sz: The LLDD sets this field to the amount of additional
+ * memory that it would like fc nvme layer to allocate on the LLDD's
+ * behalf whenever a targetport is allocated. The additional memory
+ * area solely for the of the LLDD and its location is specified by
+ * the targetport->private pointer.
+ * Value is Mandatory. Allowed to be zero.
+ */
+struct nvmet_fc_target_template {
+ void (*targetport_delete)(struct nvmet_fc_target_port *tgtport);
+ int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport,
+ struct nvmefc_tgt_ls_req *tls_req);
+ int (*fcp_op)(struct nvmet_fc_target_port *tgtport,
+ struct nvmefc_tgt_fcp_req *);
+
+ u32 max_hw_queues;
+ u16 max_sgl_segments;
+ u16 max_dif_sgl_segments;
+ u64 dma_boundary;
+
+ u32 target_features;
+
+ u32 target_priv_sz;
+};
+
+
+int nvmet_fc_register_targetport(struct nvmet_fc_port_info *portinfo,
+ struct nvmet_fc_target_template *template,
+ struct device *dev,
+ struct nvmet_fc_target_port **tgtport_p);
+
+int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport);
+
+int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport,
+ struct nvmefc_tgt_ls_req *lsreq,
+ void *lsreqbuf, u32 lsreqbuf_len);
+
+int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport,
+ struct nvmefc_tgt_fcp_req *fcpreq,
+ void *cmdiubuf, u32 cmdiubuf_len);
+
+#endif /* _NVME_FC_DRIVER_H */
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
new file mode 100644
index 000000000000..4b45226bd604
--- /dev/null
+++ b/include/linux/nvme-fc.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2016 Avago Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful.
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
+ * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
+ * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
+ * See the GNU General Public License for more details, a copy of which
+ * can be found in the file COPYING included with this package
+ *
+ */
+
+/*
+ * This file contains definitions relative to FC-NVME r1.11 and a few
+ * newer items
+ */
+
+#ifndef _NVME_FC_H
+#define _NVME_FC_H 1
+
+
+#define NVME_CMD_SCSI_ID 0xFD
+#define NVME_CMD_FC_ID FC_TYPE_NVME
+
+/* FC-NVME Cmd IU Flags */
+#define FCNVME_CMD_FLAGS_DIRMASK 0x03
+#define FCNVME_CMD_FLAGS_WRITE 0x01
+#define FCNVME_CMD_FLAGS_READ 0x02
+
+struct nvme_fc_cmd_iu {
+ __u8 scsi_id;
+ __u8 fc_id;
+ __be16 iu_len;
+ __u8 rsvd4[3];
+ __u8 flags;
+ __be64 connection_id;
+ __be32 csn;
+ __be32 data_len;
+ struct nvme_command sqe;
+ __be32 rsvd88[2];
+};
+
+#define NVME_FC_SIZEOF_ZEROS_RSP 12
+
+struct nvme_fc_ersp_iu {
+ __u8 rsvd0[2];
+ __be16 iu_len;
+ __be32 rsn;
+ __be32 xfrd_len;
+ __be32 rsvd12;
+ struct nvme_completion cqe;
+ /* for now - no additional payload */
+};
+
+
+/* FC-NVME r1.03/16-119v0 NVME Link Services */
+enum {
+ FCNVME_LS_RSVD = 0,
+ FCNVME_LS_RJT = 1,
+ FCNVME_LS_ACC = 2,
+ FCNVME_LS_CREATE_ASSOCIATION = 3,
+ FCNVME_LS_CREATE_CONNECTION = 4,
+ FCNVME_LS_DISCONNECT = 5,
+};
+
+/* FC-NVME r1.03/16-119v0 NVME Link Service Descriptors */
+enum {
+ FCNVME_LSDESC_RSVD = 0x0,
+ FCNVME_LSDESC_RQST = 0x1,
+ FCNVME_LSDESC_RJT = 0x2,
+ FCNVME_LSDESC_CREATE_ASSOC_CMD = 0x3,
+ FCNVME_LSDESC_CREATE_CONN_CMD = 0x4,
+ FCNVME_LSDESC_DISCONN_CMD = 0x5,
+ FCNVME_LSDESC_CONN_ID = 0x6,
+ FCNVME_LSDESC_ASSOC_ID = 0x7,
+};
+
+
+/* ********** start of Link Service Descriptors ********** */
+
+
+/*
+ * fills in length of a descriptor. Struture minus descriptor header
+ */
+static inline __be32 fcnvme_lsdesc_len(size_t sz)
+{
+ return cpu_to_be32(sz - (2 * sizeof(u32)));
+}
+
+
+struct fcnvme_ls_rqst_w0 {
+ u8 ls_cmd; /* FCNVME_LS_xxx */
+ u8 zeros[3];
+};
+
+/* FCNVME_LSDESC_RQST */
+struct fcnvme_lsdesc_rqst {
+ __be32 desc_tag; /* FCNVME_LSDESC_xxx */
+ __be32 desc_len;
+ struct fcnvme_ls_rqst_w0 w0;
+ __be32 rsvd12;
+};
+
+
+
+
+/* FCNVME_LSDESC_RJT */
+struct fcnvme_lsdesc_rjt {
+ __be32 desc_tag; /* FCNVME_LSDESC_xxx */
+ __be32 desc_len;
+ u8 rsvd8;
+
+ /*
+ * Reject reason and explanaction codes are generic
+ * to ELs's from LS-3.
+ */
+ u8 reason_code;
+ u8 reason_explanation;
+
+ u8 vendor;
+ __be32 rsvd12;
+};
+
+
+#define FCNVME_ASSOC_HOSTID_LEN 64
+#define FCNVME_ASSOC_HOSTNQN_LEN 256
+#define FCNVME_ASSOC_SUBNQN_LEN 256
+
+/* FCNVME_LSDESC_CREATE_ASSOC_CMD */
+struct fcnvme_lsdesc_cr_assoc_cmd {
+ __be32 desc_tag; /* FCNVME_LSDESC_xxx */
+ __be32 desc_len;
+ __be16 ersp_ratio;
+ __be16 rsvd10;
+ __be32 rsvd12[9];
+ __be16 cntlid;
+ __be16 sqsize;
+ __be32 rsvd52;
+ u8 hostid[FCNVME_ASSOC_HOSTID_LEN];
+ u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
+ u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN];
+ u8 rsvd632[384];
+};
+
+/* FCNVME_LSDESC_CREATE_CONN_CMD */
+struct fcnvme_lsdesc_cr_conn_cmd {
+ __be32 desc_tag; /* FCNVME_LSDESC_xxx */
+ __be32 desc_len;
+ __be16 ersp_ratio;
+ __be16 rsvd10;
+ __be32 rsvd12[9];
+ __be16 qid;
+ __be16 sqsize;
+ __be32 rsvd52;
+};
+
+/* Disconnect Scope Values */
+enum {
+ FCNVME_DISCONN_ASSOCIATION = 0,
+ FCNVME_DISCONN_CONNECTION = 1,
+};
+
+/* FCNVME_LSDESC_DISCONN_CMD */
+struct fcnvme_lsdesc_disconn_cmd {
+ __be32 desc_tag; /* FCNVME_LSDESC_xxx */
+ __be32 desc_len;
+ u8 rsvd8[3];
+ /* note: scope is really a 1 bit field */
+ u8 scope; /* FCNVME_DISCONN_xxx */
+ __be32 rsvd12;
+ __be64 id;
+};
+
+/* FCNVME_LSDESC_CONN_ID */
+struct fcnvme_lsdesc_conn_id {
+ __be32 desc_tag; /* FCNVME_LSDESC_xxx */
+ __be32 desc_len;
+ __be64 connection_id;
+};
+
+/* FCNVME_LSDESC_ASSOC_ID */
+struct fcnvme_lsdesc_assoc_id {
+ __be32 desc_tag; /* FCNVME_LSDESC_xxx */
+ __be32 desc_len;
+ __be64 association_id;
+};
+
+/* r_ctl values */
+enum {
+ FCNVME_RS_RCTL_DATA = 1,
+ FCNVME_RS_RCTL_XFER_RDY = 5,
+ FCNVME_RS_RCTL_RSP = 8,
+};
+
+
+/* ********** start of Link Services ********** */
+
+
+/* FCNVME_LS_RJT */
+struct fcnvme_ls_rjt {
+ struct fcnvme_ls_rqst_w0 w0;
+ __be32 desc_list_len;
+ struct fcnvme_lsdesc_rqst rqst;
+ struct fcnvme_lsdesc_rjt rjt;
+};
+
+/* FCNVME_LS_ACC */
+struct fcnvme_ls_acc_hdr {
+ struct fcnvme_ls_rqst_w0 w0;
+ __be32 desc_list_len;
+ struct fcnvme_lsdesc_rqst rqst;
+ /* Followed by cmd-specific ACC descriptors, see next definitions */
+};
+
+/* FCNVME_LS_CREATE_ASSOCIATION */
+struct fcnvme_ls_cr_assoc_rqst {
+ struct fcnvme_ls_rqst_w0 w0;
+ __be32 desc_list_len;
+ struct fcnvme_lsdesc_cr_assoc_cmd assoc_cmd;
+};
+
+struct fcnvme_ls_cr_assoc_acc {
+ struct fcnvme_ls_acc_hdr hdr;
+ struct fcnvme_lsdesc_assoc_id associd;
+ struct fcnvme_lsdesc_conn_id connectid;
+};
+
+
+/* FCNVME_LS_CREATE_CONNECTION */
+struct fcnvme_ls_cr_conn_rqst {
+ struct fcnvme_ls_rqst_w0 w0;
+ __be32 desc_list_len;
+ struct fcnvme_lsdesc_assoc_id associd;
+ struct fcnvme_lsdesc_cr_conn_cmd connect_cmd;
+};
+
+struct fcnvme_ls_cr_conn_acc {
+ struct fcnvme_ls_acc_hdr hdr;
+ struct fcnvme_lsdesc_conn_id connectid;
+};
+
+/* FCNVME_LS_DISCONNECT */
+struct fcnvme_ls_disconnect_rqst {
+ struct fcnvme_ls_rqst_w0 w0;
+ __be32 desc_list_len;
+ struct fcnvme_lsdesc_assoc_id associd;
+ struct fcnvme_lsdesc_disconn_cmd discon_cmd;
+};
+
+struct fcnvme_ls_disconnect_acc {
+ struct fcnvme_ls_acc_hdr hdr;
+};
+
+
+/*
+ * Yet to be defined in FC-NVME:
+ */
+#define NVME_FC_CONNECT_TIMEOUT_SEC 2 /* 2 seconds */
+#define NVME_FC_LS_TIMEOUT_SEC 2 /* 2 seconds */
+#define NVME_FC_TGTOP_TIMEOUT_SEC 2 /* 2 seconds */
+
+
+#endif /* _NVME_FC_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index fc3c24206593..3d1c6f1b15c9 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -242,6 +242,7 @@ enum {
NVME_CTRL_ONCS_COMPARE = 1 << 0,
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2,
+ NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_VWC_PRESENT = 1 << 0,
};
@@ -558,6 +559,23 @@ struct nvme_dsm_range {
__le64 slba;
};
+struct nvme_write_zeroes_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 slba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le32 reftag;
+ __le16 apptag;
+ __le16 appmask;
+};
+
/* Admin commands */
enum nvme_admin_opcode {
@@ -857,6 +875,7 @@ struct nvme_command {
struct nvme_download_firmware dlfw;
struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm;
+ struct nvme_write_zeroes_cmd write_zeroes;
struct nvme_abort_cmd abort;
struct nvme_get_log_page_command get_log_page;
struct nvmf_common_command fabrics;
@@ -947,6 +966,7 @@ enum {
NVME_SC_BAD_ATTRIBUTES = 0x180,
NVME_SC_INVALID_PI = 0x181,
NVME_SC_READ_ONLY = 0x182,
+ NVME_SC_ONCS_NOT_SUPPORTED = 0x183,
/*
* I/O Command Set Specific - Fabrics commands:
@@ -973,17 +993,30 @@ enum {
NVME_SC_UNWRITTEN_BLOCK = 0x287,
NVME_SC_DNR = 0x4000,
+
+
+ /*
+ * FC Transport-specific error status values for NVME commands
+ *
+ * Transport-specific status code values must be in the range 0xB0..0xBF
+ */
+
+ /* Generic FC failure - catchall */
+ NVME_SC_FC_TRANSPORT_ERROR = 0x00B0,
+
+ /* I/O failure due to FC ABTS'd */
+ NVME_SC_FC_TRANSPORT_ABORTED = 0x00B1,
};
struct nvme_completion {
/*
* Used by Admin and Fabrics commands to return data:
*/
- union {
- __le16 result16;
- __le32 result;
- __le64 result64;
- };
+ union nvme_result {
+ __le16 u16;
+ __le32 u32;
+ __le64 u64;
+ } result;
__le16 sq_head; /* how much of this queue may be reclaimed */
__le16 sq_id; /* submission queue that generated this entry */
__u16 command_id; /* of the command which completed */
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 39d5b7955b23..884c1e6eb3fe 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -27,6 +27,7 @@ typedef struct {
int match_token(char *, const match_table_t table, substring_t args[]);
int match_int(substring_t *, int *result);
+int match_u64(substring_t *, u64 *result);
int match_octal(substring_t *, int *result);
int match_hex(substring_t *, int *result);
bool match_wildcard(const char *pattern, const char *str);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 09b212d37f1d..09f4be179ff3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -319,6 +319,9 @@ extern int kswapd_run(int nid);
extern void kswapd_stop(int nid);
#ifdef CONFIG_SWAP
+
+#include <linux/blk_types.h> /* for bio_end_io_t */
+
/* linux/mm/page_io.c */
extern int swap_readpage(struct page *);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..d5aba1512b8b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -125,7 +125,7 @@ static inline bool iter_is_iovec(const struct iov_iter *i)
*
* The ?: is just for type safety.
*/
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK)
+#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
/*
* Cap the iov_iter by given limit; note that the second argument is
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 797100e10010..c78f9f0920b5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -9,6 +9,9 @@
#include <linux/fs.h>
#include <linux/flex_proportions.h>
#include <linux/backing-dev-defs.h>
+#include <linux/blk_types.h>
+
+struct bio;
DECLARE_PER_CPU(int, dirty_throttle_leaks);
@@ -100,6 +103,16 @@ struct writeback_control {
#endif
};
+static inline int wbc_to_write_flags(struct writeback_control *wbc)
+{
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ return REQ_SYNC;
+ else if (wbc->for_kupdate || wbc->for_background)
+ return REQ_BACKGROUND;
+
+ return 0;
+}
+
/*
* A wb_domain represents a domain that wb's (bdi_writeback's) belong to
* and are measured against each other in. There always is one global