summaryrefslogtreecommitdiffstats
path: root/drivers/s390
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 18:50:09 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 18:50:09 +0200
commitb68e7e952f24527de62f4768b1cead91f92f5f6e (patch)
treec9c1dbc333becac5396eaef4d5971d3f4ca337e3 /drivers/s390
parentMerge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/... (diff)
parentMerge branch 's390forkvm' of git://git.kernel.org/pub/scm/linux/kernel/git/kv... (diff)
downloadlinux-b68e7e952f24527de62f4768b1cead91f92f5f6e.tar.xz
linux-b68e7e952f24527de62f4768b1cead91f92f5f6e.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky: - three merges for KVM/s390 with changes for vfio-ccw and cpacf. The patches are included in the KVM tree as well, let git sort it out. - add the new 'trng' random number generator - provide the secure key verification API for the pkey interface - introduce the z13 cpu counters to perf - add a new system call to set up the guarded storage facility - simplify TASK_SIZE and arch_get_unmapped_area - export the raw STSI data related to CPU topology to user space - ... and the usual churn of bug-fixes and cleanups. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (74 commits) s390/crypt: use the correct module alias for paes_s390. s390/cpacf: Introduce kma instruction s390/cpacf: query instructions use unique parameters for compatibility with KMA s390/trng: Introduce s390 TRNG device driver. s390/crypto: Provide s390 specific arch random functionality. s390/crypto: Add new subfunctions to the cpacf PRNO function. s390/crypto: Renaming PPNO to PRNO. s390/pageattr: avoid unnecessary page table splitting s390/mm: simplify arch_get_unmapped_area[_topdown] s390/mm: make TASK_SIZE independent from the number of page table levels s390/gs: add regset for the guarded storage broadcast control block s390/kvm: Add use_cmma field to mm_context_t s390/kvm: Add PGSTE manipulation functions vfio: ccw: improve error handling for vfio_ccw_mdev_remove vfio: ccw: remove unnecessary NULL checks of a pointer s390/spinlock: remove compare and delay instruction s390/spinlock: use atomic primitives for spinlocks s390/cpumf: simplify detection of guest samples s390/pci: remove forward declaration s390/pci: increase the PCI_NR_FUNCTIONS default ...
Diffstat (limited to 'drivers/s390')
-rw-r--r--drivers/s390/block/dasd_3990_erp.c5
-rw-r--r--drivers/s390/block/dasd_eckd.c16
-rw-r--r--drivers/s390/block/dasd_int.h2
-rw-r--r--drivers/s390/cio/Makefile3
-rw-r--r--drivers/s390/cio/cio.c69
-rw-r--r--drivers/s390/cio/cio.h1
-rw-r--r--drivers/s390/cio/device_fsm.c54
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c842
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.h42
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c308
-rw-r--r--drivers/s390/cio/vfio_ccw_fsm.c203
-rw-r--r--drivers/s390/cio/vfio_ccw_ops.c425
-rw-r--r--drivers/s390/cio/vfio_ccw_private.h96
-rw-r--r--drivers/s390/crypto/pkey_api.c64
14 files changed, 2077 insertions, 53 deletions
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 774da20ceb58..107cd3361e29 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -1052,8 +1052,9 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
} else {
/* fatal error - set status to FAILED
internal error 09 - Command Reject */
- dev_err(&device->cdev->dev, "An error occurred in the DASD "
- "device driver, reason=%s\n", "09");
+ if (!test_bit(DASD_CQR_SUPPRESS_CR, &erp->flags))
+ dev_err(&device->cdev->dev,
+ "An error occurred in the DASD device driver, reason=09\n");
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 0b38217f8147..122456e4db89 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -4927,10 +4927,14 @@ static void dasd_eckd_dump_sense(struct dasd_device *device,
dasd_eckd_dump_sense_tcw(device, req, irb);
} else {
/*
- * In some cases the 'No Record Found' error might be expected
- * and log messages shouldn't be written then. Check if the
- * according suppress bit is set.
+ * In some cases the 'Command Reject' or 'No Record Found'
+ * error might be expected and log messages shouldn't be
+ * written then. Check if the according suppress bit is set.
*/
+ if (sense && sense[0] & SNS0_CMD_REJECT &&
+ test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
+ return;
+
if (sense && sense[1] & SNS1_NO_REC_FOUND &&
test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
return;
@@ -5172,6 +5176,10 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
if (!device->block && private->lcu->pav == HYPER_PAV)
return -EOPNOTSUPP;
+ /* may not be supported by the storage server */
+ if (!(private->features.feature[14] & 0x80))
+ return -EOPNOTSUPP;
+
cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
sizeof(struct dasd_psf_prssd_data) + 1,
device);
@@ -5219,6 +5227,8 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
cqr->buildclk = get_tod_clock();
cqr->status = DASD_CQR_FILLED;
+ /* the command might not be supported, suppress error message */
+ __set_bit(DASD_CQR_SUPPRESS_CR, &cqr->flags);
rc = dasd_sleep_on_interruptible(cqr);
if (rc == 0) {
*data = *host_access;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 518dba2732d5..dca7cb1e6f65 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -239,11 +239,11 @@ struct dasd_ccw_req {
*/
/*
* The following flags are used to suppress output of certain errors.
- * These flags should only be used for format checks!
*/
#define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */
#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/
#define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */
+#define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */
/* Signature for error recovery functions. */
typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *);
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index 3ab9aedeb84a..bdf47526038a 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
obj-$(CONFIG_QDIO) += qdio.o
+
+vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
+obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 1b350665c823..89216174fcbb 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -170,12 +170,14 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */
return ccode;
}
}
+EXPORT_SYMBOL_GPL(cio_start_key);
int
cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm)
{
return cio_start_key(sch, cpa, lpm, PAGE_DEFAULT_KEY);
}
+EXPORT_SYMBOL_GPL(cio_start);
/*
* resume suspended I/O operation
@@ -208,6 +210,7 @@ cio_resume (struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_resume);
/*
* halt I/O operation
@@ -241,6 +244,7 @@ cio_halt(struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_halt);
/*
* Clear I/O operation
@@ -271,6 +275,7 @@ cio_clear(struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_clear);
/*
* Function: cio_cancel
@@ -308,7 +313,68 @@ cio_cancel (struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_cancel);
+/**
+ * cio_cancel_halt_clear - Cancel running I/O by performing cancel, halt
+ * and clear ordinally if subchannel is valid.
+ * @sch: subchannel on which to perform the cancel_halt_clear operation
+ * @iretry: the number of the times remained to retry the next operation
+ *
+ * This should be called repeatedly since halt/clear are asynchronous
+ * operations. We do one try with cio_cancel, three tries with cio_halt,
+ * 255 tries with cio_clear. The caller should initialize @iretry with
+ * the value 255 for its first call to this, and keep using the same
+ * @iretry in the subsequent calls until it gets a non -EBUSY return.
+ *
+ * Returns 0 if device now idle, -ENODEV for device not operational,
+ * -EBUSY if an interrupt is expected (either from halt/clear or from a
+ * status pending), and -EIO if out of retries.
+ */
+int cio_cancel_halt_clear(struct subchannel *sch, int *iretry)
+{
+ int ret;
+
+ if (cio_update_schib(sch))
+ return -ENODEV;
+ if (!sch->schib.pmcw.ena)
+ /* Not operational -> done. */
+ return 0;
+ /* Stage 1: cancel io. */
+ if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
+ !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
+ if (!scsw_is_tm(&sch->schib.scsw)) {
+ ret = cio_cancel(sch);
+ if (ret != -EINVAL)
+ return ret;
+ }
+ /*
+ * Cancel io unsuccessful or not applicable (transport mode).
+ * Continue with asynchronous instructions.
+ */
+ *iretry = 3; /* 3 halt retries. */
+ }
+ /* Stage 2: halt io. */
+ if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
+ if (*iretry) {
+ *iretry -= 1;
+ ret = cio_halt(sch);
+ if (ret != -EBUSY)
+ return (ret == 0) ? -EBUSY : ret;
+ }
+ /* Halt io unsuccessful. */
+ *iretry = 255; /* 255 clear retries. */
+ }
+ /* Stage 3: clear io. */
+ if (*iretry) {
+ *iretry -= 1;
+ ret = cio_clear(sch);
+ return (ret == 0) ? -EBUSY : ret;
+ }
+ /* Function was unsuccessful */
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(cio_cancel_halt_clear);
static void cio_apply_config(struct subchannel *sch, struct schib *schib)
{
@@ -382,6 +448,7 @@ int cio_commit_config(struct subchannel *sch)
}
return ret;
}
+EXPORT_SYMBOL_GPL(cio_commit_config);
/**
* cio_update_schib - Perform stsch and update schib if subchannel is valid.
@@ -987,6 +1054,7 @@ int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key)
return cio_start_handle_notoper(sch, lpm);
}
}
+EXPORT_SYMBOL_GPL(cio_tm_start_key);
/**
* cio_tm_intrg - perform interrogate function
@@ -1012,3 +1080,4 @@ int cio_tm_intrg(struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_tm_intrg);
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index f0e57aefb5f2..939596d81b73 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -123,6 +123,7 @@ extern int cio_enable_subchannel(struct subchannel *, u32);
extern int cio_disable_subchannel (struct subchannel *);
extern int cio_cancel (struct subchannel *);
extern int cio_clear (struct subchannel *);
+extern int cio_cancel_halt_clear(struct subchannel *, int *);
extern int cio_resume (struct subchannel *);
extern int cio_halt (struct subchannel *);
extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 9afb5ce13007..12016e32e519 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -124,14 +124,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires)
add_timer(&cdev->private->timer);
}
-/*
- * Cancel running i/o. This is called repeatedly since halt/clear are
- * asynchronous operations. We do one try with cio_cancel, two tries
- * with cio_halt, 255 tries with cio_clear. If everythings fails panic.
- * Returns 0 if device now idle, -ENODEV for device not operational and
- * -EBUSY if an interrupt is expected (either from halt/clear or from a
- * status pending).
- */
int
ccw_device_cancel_halt_clear(struct ccw_device *cdev)
{
@@ -139,44 +131,14 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
int ret;
sch = to_subchannel(cdev->dev.parent);
- if (cio_update_schib(sch))
- return -ENODEV;
- if (!sch->schib.pmcw.ena)
- /* Not operational -> done. */
- return 0;
- /* Stage 1: cancel io. */
- if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
- !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
- if (!scsw_is_tm(&sch->schib.scsw)) {
- ret = cio_cancel(sch);
- if (ret != -EINVAL)
- return ret;
- }
- /* cancel io unsuccessful or not applicable (transport mode).
- * Continue with asynchronous instructions. */
- cdev->private->iretry = 3; /* 3 halt retries. */
- }
- if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
- /* Stage 2: halt io. */
- if (cdev->private->iretry) {
- cdev->private->iretry--;
- ret = cio_halt(sch);
- if (ret != -EBUSY)
- return (ret == 0) ? -EBUSY : ret;
- }
- /* halt io unsuccessful. */
- cdev->private->iretry = 255; /* 255 clear retries. */
- }
- /* Stage 3: clear io. */
- if (cdev->private->iretry) {
- cdev->private->iretry--;
- ret = cio_clear (sch);
- return (ret == 0) ? -EBUSY : ret;
- }
- /* Function was unsuccessful */
- CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
- cdev->private->dev_id.ssid, cdev->private->dev_id.devno);
- return -EIO;
+ ret = cio_cancel_halt_clear(sch, &cdev->private->iretry);
+
+ if (ret == -EIO)
+ CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno);
+
+ return ret;
}
void ccw_device_update_sense_data(struct ccw_device *cdev)
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
new file mode 100644
index 000000000000..ba6ac83a6c25
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -0,0 +1,842 @@
+/*
+ * channel program interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/vfio.h>
+#include <asm/idals.h>
+
+#include "vfio_ccw_cp.h"
+
+/*
+ * Max length for ccw chain.
+ * XXX: Limit to 256, need to check more?
+ */
+#define CCWCHAIN_LEN_MAX 256
+
+struct pfn_array {
+ unsigned long pa_iova;
+ unsigned long *pa_iova_pfn;
+ unsigned long *pa_pfn;
+ int pa_nr;
+};
+
+struct pfn_array_table {
+ struct pfn_array *pat_pa;
+ int pat_nr;
+};
+
+struct ccwchain {
+ struct list_head next;
+ struct ccw1 *ch_ccw;
+ /* Guest physical address of the current chain. */
+ u64 ch_iova;
+ /* Count of the valid ccws in chain. */
+ int ch_len;
+ /* Pinned PAGEs for the original data. */
+ struct pfn_array_table *ch_pat;
+};
+
+/*
+ * pfn_array_pin() - pin user pages in memory
+ * @pa: pfn_array on which to perform the operation
+ * @mdev: the mediated device to perform pin/unpin operations
+ *
+ * Attempt to pin user pages in memory.
+ *
+ * Usage of pfn_array:
+ * @pa->pa_iova starting guest physical I/O address. Assigned by caller.
+ * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated
+ * by caller.
+ * @pa->pa_pfn array that receives PFNs of the pages pinned. Allocated by
+ * caller.
+ * @pa->pa_nr number of pages from @pa->pa_iova to pin. Assigned by
+ * caller.
+ * number of pages pinned. Assigned by callee.
+ *
+ * Returns:
+ * Number of pages pinned on success.
+ * If @pa->pa_nr is 0 or negative, returns 0.
+ * If no pages were pinned, returns -errno.
+ */
+static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
+{
+ int i, ret;
+
+ if (pa->pa_nr <= 0) {
+ pa->pa_nr = 0;
+ return 0;
+ }
+
+ pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
+ for (i = 1; i < pa->pa_nr; i++)
+ pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
+
+ ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
+ IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
+
+ if (ret > 0 && ret != pa->pa_nr) {
+ vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
+ pa->pa_nr = 0;
+ return 0;
+ }
+
+ return ret;
+}
+
+/* Unpin the pages before releasing the memory. */
+static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
+{
+ vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
+ pa->pa_nr = 0;
+ kfree(pa->pa_iova_pfn);
+}
+
+/* Alloc memory for PFNs, then pin pages with them. */
+static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
+ u64 iova, unsigned int len)
+{
+ int ret = 0;
+
+ if (!len || pa->pa_nr)
+ return -EINVAL;
+
+ pa->pa_iova = iova;
+
+ pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ if (!pa->pa_nr)
+ return -EINVAL;
+
+ pa->pa_iova_pfn = kcalloc(pa->pa_nr,
+ sizeof(*pa->pa_iova_pfn) +
+ sizeof(*pa->pa_pfn),
+ GFP_KERNEL);
+ if (unlikely(!pa->pa_iova_pfn))
+ return -ENOMEM;
+ pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
+
+ ret = pfn_array_pin(pa, mdev);
+
+ if (ret > 0)
+ return ret;
+ else if (!ret)
+ ret = -EINVAL;
+
+ kfree(pa->pa_iova_pfn);
+
+ return ret;
+}
+
+static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
+{
+ pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
+ if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
+ pat->pat_nr = 0;
+ return -ENOMEM;
+ }
+
+ pat->pat_nr = nr;
+
+ return 0;
+}
+
+static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
+ struct device *mdev)
+{
+ int i;
+
+ for (i = 0; i < pat->pat_nr; i++)
+ pfn_array_unpin_free(pat->pat_pa + i, mdev);
+
+ if (pat->pat_nr) {
+ kfree(pat->pat_pa);
+ pat->pat_pa = NULL;
+ pat->pat_nr = 0;
+ }
+}
+
+static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
+ unsigned long iova)
+{
+ struct pfn_array *pa = pat->pat_pa;
+ unsigned long iova_pfn = iova >> PAGE_SHIFT;
+ int i, j;
+
+ for (i = 0; i < pat->pat_nr; i++, pa++)
+ for (j = 0; j < pa->pa_nr; j++)
+ if (pa->pa_iova_pfn[i] == iova_pfn)
+ return true;
+
+ return false;
+}
+/* Create the list idal words for a pfn_array_table. */
+static inline void pfn_array_table_idal_create_words(
+ struct pfn_array_table *pat,
+ unsigned long *idaws)
+{
+ struct pfn_array *pa;
+ int i, j, k;
+
+ /*
+ * Idal words (execept the first one) rely on the memory being 4k
+ * aligned. If a user virtual address is 4K aligned, then it's
+ * corresponding kernel physical address will also be 4K aligned. Thus
+ * there will be no problem here to simply use the phys to create an
+ * idaw.
+ */
+ k = 0;
+ for (i = 0; i < pat->pat_nr; i++) {
+ pa = pat->pat_pa + i;
+ for (j = 0; j < pa->pa_nr; j++) {
+ idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
+ if (k == 0)
+ idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
+ k++;
+ }
+ }
+}
+
+
+/*
+ * Within the domain (@mdev), copy @n bytes from a guest physical
+ * address (@iova) to a host physical address (@to).
+ */
+static long copy_from_iova(struct device *mdev,
+ void *to, u64 iova,
+ unsigned long n)
+{
+ struct pfn_array pa = {0};
+ u64 from;
+ int i, ret;
+ unsigned long l, m;
+
+ ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
+ if (ret <= 0)
+ return ret;
+
+ l = n;
+ for (i = 0; i < pa.pa_nr; i++) {
+ from = pa.pa_pfn[i] << PAGE_SHIFT;
+ m = PAGE_SIZE;
+ if (i == 0) {
+ from += iova & (PAGE_SIZE - 1);
+ m -= iova & (PAGE_SIZE - 1);
+ }
+
+ m = min(l, m);
+ memcpy(to + (n - l), (void *)from, m);
+
+ l -= m;
+ if (l == 0)
+ break;
+ }
+
+ pfn_array_unpin_free(&pa, mdev);
+
+ return l;
+}
+
+static long copy_ccw_from_iova(struct channel_program *cp,
+ struct ccw1 *to, u64 iova,
+ unsigned long len)
+{
+ struct ccw0 ccw0;
+ struct ccw1 *pccw1;
+ int ret;
+ int i;
+
+ ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
+ if (ret)
+ return ret;
+
+ if (!cp->orb.cmd.fmt) {
+ pccw1 = to;
+ for (i = 0; i < len; i++) {
+ ccw0 = *(struct ccw0 *)pccw1;
+ if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
+ pccw1->cmd_code = CCW_CMD_TIC;
+ pccw1->flags = 0;
+ pccw1->count = 0;
+ } else {
+ pccw1->cmd_code = ccw0.cmd_code;
+ pccw1->flags = ccw0.flags;
+ pccw1->count = ccw0.count;
+ }
+ pccw1->cda = ccw0.cda;
+ pccw1++;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Helpers to operate ccwchain.
+ */
+#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
+
+#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
+
+#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
+
+#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
+
+
+#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
+
+static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
+{
+ struct ccwchain *chain;
+ void *data;
+ size_t size;
+
+ /* Make ccw address aligned to 8. */
+ size = ((sizeof(*chain) + 7L) & -8L) +
+ sizeof(*chain->ch_ccw) * len +
+ sizeof(*chain->ch_pat) * len;
+ chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
+ if (!chain)
+ return NULL;
+
+ data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
+ chain->ch_ccw = (struct ccw1 *)data;
+
+ data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
+ chain->ch_pat = (struct pfn_array_table *)data;
+
+ chain->ch_len = len;
+
+ list_add_tail(&chain->next, &cp->ccwchain_list);
+
+ return chain;
+}
+
+static void ccwchain_free(struct ccwchain *chain)
+{
+ list_del(&chain->next);
+ kfree(chain);
+}
+
+/* Free resource for a ccw that allocated memory for its cda. */
+static void ccwchain_cda_free(struct ccwchain *chain, int idx)
+{
+ struct ccw1 *ccw = chain->ch_ccw + idx;
+
+ if (!ccw->count)
+ return;
+
+ kfree((void *)(u64)ccw->cda);
+}
+
+/* Unpin the pages then free the memory resources. */
+static void cp_unpin_free(struct channel_program *cp)
+{
+ struct ccwchain *chain, *temp;
+ int i;
+
+ list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
+ for (i = 0; i < chain->ch_len; i++) {
+ pfn_array_table_unpin_free(chain->ch_pat + i,
+ cp->mdev);
+ ccwchain_cda_free(chain, i);
+ }
+ ccwchain_free(chain);
+ }
+}
+
+/**
+ * ccwchain_calc_length - calculate the length of the ccw chain.
+ * @iova: guest physical address of the target ccw chain
+ * @cp: channel_program on which to perform the operation
+ *
+ * This is the chain length not considering any TICs.
+ * You need to do a new round for each TIC target.
+ *
+ * Returns: the length of the ccw chain or -errno.
+ */
+static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
+{
+ struct ccw1 *ccw, *p;
+ int cnt;
+
+ /*
+ * Copy current chain from guest to host kernel.
+ * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
+ * So copying 2K is enough (safe).
+ */
+ p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
+ if (!ccw)
+ return -ENOMEM;
+
+ cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
+ if (cnt) {
+ kfree(ccw);
+ return cnt;
+ }
+
+ cnt = 0;
+ do {
+ cnt++;
+
+ if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
+ break;
+
+ ccw++;
+ } while (cnt < CCWCHAIN_LEN_MAX + 1);
+
+ if (cnt == CCWCHAIN_LEN_MAX + 1)
+ cnt = -EINVAL;
+
+ kfree(p);
+ return cnt;
+}
+
+static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
+{
+ struct ccwchain *chain;
+ u32 ccw_head, ccw_tail;
+
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ ccw_head = chain->ch_iova;
+ ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1);
+
+ if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ccwchain_loop_tic(struct ccwchain *chain,
+ struct channel_program *cp);
+
+static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
+{
+ struct ccwchain *chain;
+ int len, ret;
+
+ /* May transfer to an existing chain. */
+ if (tic_target_chain_exists(tic, cp))
+ return 0;
+
+ /* Get chain length. */
+ len = ccwchain_calc_length(tic->cda, cp);
+ if (len < 0)
+ return len;
+
+ /* Need alloc a new chain for this one. */
+ chain = ccwchain_alloc(cp, len);
+ if (!chain)
+ return -ENOMEM;
+ chain->ch_iova = tic->cda;
+
+ /* Copy the new chain from user. */
+ ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
+ if (ret) {
+ ccwchain_free(chain);
+ return ret;
+ }
+
+ /* Loop for tics on this new chain. */
+ return ccwchain_loop_tic(chain, cp);
+}
+
+/* Loop for TICs. */
+static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
+{
+ struct ccw1 *tic;
+ int i, ret;
+
+ for (i = 0; i < chain->ch_len; i++) {
+ tic = chain->ch_ccw + i;
+
+ if (!ccw_is_tic(tic))
+ continue;
+
+ ret = ccwchain_handle_tic(tic, cp);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ccwchain_fetch_tic(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw = chain->ch_ccw + idx;
+ struct ccwchain *iter;
+ u32 ccw_head, ccw_tail;
+
+ list_for_each_entry(iter, &cp->ccwchain_list, next) {
+ ccw_head = iter->ch_iova;
+ ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1);
+
+ if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) {
+ ccw->cda = (__u32) (addr_t) (iter->ch_ccw +
+ (ccw->cda - ccw_head));
+ return 0;
+ }
+ }
+
+ return -EFAULT;
+}
+
+static int ccwchain_fetch_direct(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw;
+ struct pfn_array_table *pat;
+ unsigned long *idaws;
+ int idaw_nr;
+
+ ccw = chain->ch_ccw + idx;
+
+ /*
+ * Pin data page(s) in memory.
+ * The number of pages actually is the count of the idaws which will be
+ * needed when translating a direct ccw to a idal ccw.
+ */
+ pat = chain->ch_pat + idx;
+ if (pfn_array_table_init(pat, 1))
+ return -ENOMEM;
+ idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev,
+ ccw->cda, ccw->count);
+ if (idaw_nr < 0)
+ return idaw_nr;
+
+ /* Translate this direct ccw to a idal ccw. */
+ idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
+ if (!idaws) {
+ pfn_array_table_unpin_free(pat, cp->mdev);
+ return -ENOMEM;
+ }
+ ccw->cda = (__u32) virt_to_phys(idaws);
+ ccw->flags |= CCW_FLAG_IDA;
+
+ pfn_array_table_idal_create_words(pat, idaws);
+
+ return 0;
+}
+
+static int ccwchain_fetch_idal(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw;
+ struct pfn_array_table *pat;
+ unsigned long *idaws;
+ u64 idaw_iova;
+ unsigned int idaw_nr, idaw_len;
+ int i, ret;
+
+ ccw = chain->ch_ccw + idx;
+
+ /* Calculate size of idaws. */
+ ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
+ if (ret)
+ return ret;
+ idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
+ idaw_len = idaw_nr * sizeof(*idaws);
+
+ /* Pin data page(s) in memory. */
+ pat = chain->ch_pat + idx;
+ ret = pfn_array_table_init(pat, idaw_nr);
+ if (ret)
+ return ret;
+
+ /* Translate idal ccw to use new allocated idaws. */
+ idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
+ if (!idaws) {
+ ret = -ENOMEM;
+ goto out_unpin;
+ }
+
+ ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
+ if (ret)
+ goto out_free_idaws;
+
+ ccw->cda = virt_to_phys(idaws);
+
+ for (i = 0; i < idaw_nr; i++) {
+ idaw_iova = *(idaws + i);
+ if (IS_ERR_VALUE(idaw_iova)) {
+ ret = -EFAULT;
+ goto out_free_idaws;
+ }
+
+ ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
+ idaw_iova, 1);
+ if (ret < 0)
+ goto out_free_idaws;
+ }
+
+ pfn_array_table_idal_create_words(pat, idaws);
+
+ return 0;
+
+out_free_idaws:
+ kfree(idaws);
+out_unpin:
+ pfn_array_table_unpin_free(pat, cp->mdev);
+ return ret;
+}
+
+/*
+ * Fetch one ccw.
+ * To reduce memory copy, we'll pin the cda page in memory,
+ * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
+ * direct ccws to idal ccws.
+ */
+static int ccwchain_fetch_one(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw = chain->ch_ccw + idx;
+
+ if (ccw_is_test(ccw) || ccw_is_noop(ccw))
+ return 0;
+
+ if (ccw_is_tic(ccw))
+ return ccwchain_fetch_tic(chain, idx, cp);
+
+ if (ccw_is_idal(ccw))
+ return ccwchain_fetch_idal(chain, idx, cp);
+
+ return ccwchain_fetch_direct(chain, idx, cp);
+}
+
+/**
+ * cp_init() - allocate ccwchains for a channel program.
+ * @cp: channel_program on which to perform the operation
+ * @mdev: the mediated device to perform pin/unpin operations
+ * @orb: control block for the channel program from the guest
+ *
+ * This creates one or more ccwchain(s), and copies the raw data of
+ * the target channel program from @orb->cmd.iova to the new ccwchain(s).
+ *
+ * Limitations:
+ * 1. Supports only prefetch enabled mode.
+ * 2. Supports idal(c64) ccw chaining.
+ * 3. Supports 4k idaw.
+ *
+ * Returns:
+ * %0 on success and a negative error value on failure.
+ */
+int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
+{
+ u64 iova = orb->cmd.cpa;
+ struct ccwchain *chain;
+ int len, ret;
+
+ /*
+ * XXX:
+ * Only support prefetch enable mode now.
+ * Only support 64bit addressing idal.
+ * Only support 4k IDAW.
+ */
+ if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k)
+ return -EOPNOTSUPP;
+
+ INIT_LIST_HEAD(&cp->ccwchain_list);
+ memcpy(&cp->orb, orb, sizeof(*orb));
+ cp->mdev = mdev;
+
+ /* Get chain length. */
+ len = ccwchain_calc_length(iova, cp);
+ if (len < 0)
+ return len;
+
+ /* Alloc mem for the head chain. */
+ chain = ccwchain_alloc(cp, len);
+ if (!chain)
+ return -ENOMEM;
+ chain->ch_iova = iova;
+
+ /* Copy the head chain from guest. */
+ ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
+ if (ret) {
+ ccwchain_free(chain);
+ return ret;
+ }
+
+ /* Now loop for its TICs. */
+ ret = ccwchain_loop_tic(chain, cp);
+ if (ret)
+ cp_unpin_free(cp);
+
+ return ret;
+}
+
+
+/**
+ * cp_free() - free resources for channel program.
+ * @cp: channel_program on which to perform the operation
+ *
+ * This unpins the memory pages and frees the memory space occupied by
+ * @cp, which must have been returned by a previous call to cp_init().
+ * Otherwise, undefined behavior occurs.
+ */
+void cp_free(struct channel_program *cp)
+{
+ cp_unpin_free(cp);
+}
+
+/**
+ * cp_prefetch() - translate a guest physical address channel program to
+ * a real-device runnable channel program.
+ * @cp: channel_program on which to perform the operation
+ *
+ * This function translates the guest-physical-address channel program
+ * and stores the result to ccwchain list. @cp must have been
+ * initialized by a previous call with cp_init(). Otherwise, undefined
+ * behavior occurs.
+ *
+ * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
+ * as helpers to do ccw chain translation inside the kernel. Basically
+ * they accept a channel program issued by a virtual machine, and
+ * translate the channel program to a real-device runnable channel
+ * program.
+ *
+ * These APIs will copy the ccws into kernel-space buffers, and update
+ * the guest phsical addresses with their corresponding host physical
+ * addresses. Then channel I/O device drivers could issue the
+ * translated channel program to real devices to perform an I/O
+ * operation.
+ *
+ * These interfaces are designed to support translation only for
+ * channel programs, which are generated and formatted by a
+ * guest. Thus this will make it possible for things like VFIO to
+ * leverage the interfaces to passthrough a channel I/O mediated
+ * device in QEMU.
+ *
+ * We support direct ccw chaining by translating them to idal ccws.
+ *
+ * Returns:
+ * %0 on success and a negative error value on failure.
+ */
+int cp_prefetch(struct channel_program *cp)
+{
+ struct ccwchain *chain;
+ int len, idx, ret;
+
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ len = chain->ch_len;
+ for (idx = 0; idx < len; idx++) {
+ ret = ccwchain_fetch_one(chain, idx, cp);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * cp_get_orb() - get the orb of the channel program
+ * @cp: channel_program on which to perform the operation
+ * @intparm: new intparm for the returned orb
+ * @lpm: candidate value of the logical-path mask for the returned orb
+ *
+ * This function returns the address of the updated orb of the channel
+ * program. Channel I/O device drivers could use this orb to issue a
+ * ssch.
+ */
+union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
+{
+ union orb *orb;
+ struct ccwchain *chain;
+ struct ccw1 *cpa;
+
+ orb = &cp->orb;
+
+ orb->cmd.intparm = intparm;
+ orb->cmd.fmt = 1;
+ orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
+
+ if (orb->cmd.lpm == 0)
+ orb->cmd.lpm = lpm;
+
+ chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
+ cpa = chain->ch_ccw;
+ orb->cmd.cpa = (__u32) __pa(cpa);
+
+ return orb;
+}
+
+/**
+ * cp_update_scsw() - update scsw for a channel program.
+ * @cp: channel_program on which to perform the operation
+ * @scsw: I/O results of the channel program and also the target to be
+ * updated
+ *
+ * @scsw contains the I/O results of the channel program that pointed
+ * to by @cp. However what @scsw->cpa stores is a host physical
+ * address, which is meaningless for the guest, which is waiting for
+ * the I/O results.
+ *
+ * This function updates @scsw->cpa to its coressponding guest physical
+ * address.
+ */
+void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
+{
+ struct ccwchain *chain;
+ u32 cpa = scsw->cmd.cpa;
+ u32 ccw_head, ccw_tail;
+
+ /*
+ * LATER:
+ * For now, only update the cmd.cpa part. We may need to deal with
+ * other portions of the schib as well, even if we don't return them
+ * in the ioctl directly. Path status changes etc.
+ */
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ ccw_head = (u32)(u64)chain->ch_ccw;
+ ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1);
+
+ if ((ccw_head <= cpa) && (cpa <= ccw_tail)) {
+ /*
+ * (cpa - ccw_head) is the offset value of the host
+ * physical ccw to its chain head.
+ * Adding this value to the guest physical ccw chain
+ * head gets us the guest cpa.
+ */
+ cpa = chain->ch_iova + (cpa - ccw_head);
+ break;
+ }
+ }
+
+ scsw->cmd.cpa = cpa;
+}
+
+/**
+ * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
+ * @cmd: ccwchain command on which to perform the operation
+ * @iova: the iova to check
+ *
+ * If the @iova is currently pinned for the ccw chain, return true;
+ * else return false.
+ */
+bool cp_iova_pinned(struct channel_program *cp, u64 iova)
+{
+ struct ccwchain *chain;
+ int i;
+
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ for (i = 0; i < chain->ch_len; i++)
+ if (pfn_array_table_iova_pinned(chain->ch_pat + i,
+ iova))
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
new file mode 100644
index 000000000000..7a1996b3b36d
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -0,0 +1,42 @@
+/*
+ * channel program interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_CP_H_
+#define _VFIO_CCW_CP_H_
+
+#include <asm/cio.h>
+#include <asm/scsw.h>
+
+#include "orb.h"
+
+/**
+ * struct channel_program - manage information for channel program
+ * @ccwchain_list: list head of ccwchains
+ * @orb: orb for the currently processed ssch request
+ * @mdev: the mediated device to perform page pinning/unpinning
+ *
+ * @ccwchain_list is the head of a ccwchain list, that contents the
+ * translated result of the guest channel program that pointed out by
+ * the iova parameter when calling cp_init.
+ */
+struct channel_program {
+ struct list_head ccwchain_list;
+ union orb orb;
+ struct device *mdev;
+};
+
+extern int cp_init(struct channel_program *cp, struct device *mdev,
+ union orb *orb);
+extern void cp_free(struct channel_program *cp);
+extern int cp_prefetch(struct channel_program *cp);
+extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm);
+extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw);
+extern bool cp_iova_pinned(struct channel_program *cp, u64 iova);
+
+#endif
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
new file mode 100644
index 000000000000..e90dd43d2a55
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -0,0 +1,308 @@
+/*
+ * VFIO based Physical Subchannel device driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/mdev.h>
+
+#include <asm/isc.h>
+
+#include "ioasm.h"
+#include "css.h"
+#include "vfio_ccw_private.h"
+
+struct workqueue_struct *vfio_ccw_work_q;
+
+/*
+ * Helpers
+ */
+int vfio_ccw_sch_quiesce(struct subchannel *sch)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+ DECLARE_COMPLETION_ONSTACK(completion);
+ int iretry, ret = 0;
+
+ spin_lock_irq(sch->lock);
+ if (!sch->schib.pmcw.ena)
+ goto out_unlock;
+ ret = cio_disable_subchannel(sch);
+ if (ret != -EBUSY)
+ goto out_unlock;
+
+ do {
+ iretry = 255;
+
+ ret = cio_cancel_halt_clear(sch, &iretry);
+ while (ret == -EBUSY) {
+ /*
+ * Flush all I/O and wait for
+ * cancel/halt/clear completion.
+ */
+ private->completion = &completion;
+ spin_unlock_irq(sch->lock);
+
+ wait_for_completion_timeout(&completion, 3*HZ);
+
+ spin_lock_irq(sch->lock);
+ private->completion = NULL;
+ flush_workqueue(vfio_ccw_work_q);
+ ret = cio_cancel_halt_clear(sch, &iretry);
+ };
+
+ ret = cio_disable_subchannel(sch);
+ } while (ret == -EBUSY);
+out_unlock:
+ private->state = VFIO_CCW_STATE_NOT_OPER;
+ spin_unlock_irq(sch->lock);
+ return ret;
+}
+
+static void vfio_ccw_sch_io_todo(struct work_struct *work)
+{
+ struct vfio_ccw_private *private;
+ struct subchannel *sch;
+ struct irb *irb;
+
+ private = container_of(work, struct vfio_ccw_private, io_work);
+ irb = &private->irb;
+ sch = private->sch;
+
+ if (scsw_is_solicited(&irb->scsw)) {
+ cp_update_scsw(&private->cp, &irb->scsw);
+ cp_free(&private->cp);
+ }
+ memcpy(private->io_region.irb_area, irb, sizeof(*irb));
+
+ if (private->io_trigger)
+ eventfd_signal(private->io_trigger, 1);
+
+ if (private->mdev)
+ private->state = VFIO_CCW_STATE_IDLE;
+}
+
+/*
+ * Sysfs interfaces
+ */
+static ssize_t chpids_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ struct chsc_ssd_info *ssd = &sch->ssd_info;
+ ssize_t ret = 0;
+ int chp;
+ int mask;
+
+ for (chp = 0; chp < 8; chp++) {
+ mask = 0x80 >> chp;
+ if (ssd->path_mask & mask)
+ ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
+ else
+ ret += sprintf(buf + ret, "00 ");
+ }
+ ret += sprintf(buf+ret, "\n");
+ return ret;
+}
+
+static ssize_t pimpampom_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ struct pmcw *pmcw = &sch->schib.pmcw;
+
+ return sprintf(buf, "%02x %02x %02x\n",
+ pmcw->pim, pmcw->pam, pmcw->pom);
+}
+
+static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
+static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
+
+static struct attribute *vfio_subchannel_attrs[] = {
+ &dev_attr_chpids.attr,
+ &dev_attr_pimpampom.attr,
+ NULL,
+};
+
+static struct attribute_group vfio_subchannel_attr_group = {
+ .attrs = vfio_subchannel_attrs,
+};
+
+/*
+ * Css driver callbacks
+ */
+static void vfio_ccw_sch_irq(struct subchannel *sch)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+ inc_irq_stat(IRQIO_CIO);
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
+}
+
+static int vfio_ccw_sch_probe(struct subchannel *sch)
+{
+ struct pmcw *pmcw = &sch->schib.pmcw;
+ struct vfio_ccw_private *private;
+ int ret;
+
+ if (pmcw->qf) {
+ dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
+ dev_name(&sch->dev));
+ return -ENODEV;
+ }
+
+ private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
+ if (!private)
+ return -ENOMEM;
+ private->sch = sch;
+ dev_set_drvdata(&sch->dev, private);
+
+ spin_lock_irq(sch->lock);
+ private->state = VFIO_CCW_STATE_NOT_OPER;
+ sch->isc = VFIO_CCW_ISC;
+ ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
+ spin_unlock_irq(sch->lock);
+ if (ret)
+ goto out_free;
+
+ ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+ if (ret)
+ goto out_disable;
+
+ ret = vfio_ccw_mdev_reg(sch);
+ if (ret)
+ goto out_rm_group;
+
+ INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+ atomic_set(&private->avail, 1);
+ private->state = VFIO_CCW_STATE_STANDBY;
+
+ return 0;
+
+out_rm_group:
+ sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+out_disable:
+ cio_disable_subchannel(sch);
+out_free:
+ dev_set_drvdata(&sch->dev, NULL);
+ kfree(private);
+ return ret;
+}
+
+static int vfio_ccw_sch_remove(struct subchannel *sch)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+ vfio_ccw_sch_quiesce(sch);
+
+ vfio_ccw_mdev_unreg(sch);
+
+ sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+
+ dev_set_drvdata(&sch->dev, NULL);
+
+ kfree(private);
+
+ return 0;
+}
+
+static void vfio_ccw_sch_shutdown(struct subchannel *sch)
+{
+ vfio_ccw_sch_quiesce(sch);
+}
+
+/**
+ * vfio_ccw_sch_event - process subchannel event
+ * @sch: subchannel
+ * @process: non-zero if function is called in process context
+ *
+ * An unspecified event occurred for this subchannel. Adjust data according
+ * to the current operational state of the subchannel. Return zero when the
+ * event has been handled sufficiently or -EAGAIN when this function should
+ * be called again in process context.
+ */
+static int vfio_ccw_sch_event(struct subchannel *sch, int process)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+ unsigned long flags;
+
+ spin_lock_irqsave(sch->lock, flags);
+ if (!device_is_registered(&sch->dev))
+ goto out_unlock;
+
+ if (work_pending(&sch->todo_work))
+ goto out_unlock;
+
+ if (cio_update_schib(sch)) {
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
+ goto out_unlock;
+ }
+
+ private = dev_get_drvdata(&sch->dev);
+ if (private->state == VFIO_CCW_STATE_NOT_OPER) {
+ private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
+ VFIO_CCW_STATE_STANDBY;
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(sch->lock, flags);
+
+ return 0;
+}
+
+static struct css_device_id vfio_ccw_sch_ids[] = {
+ { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
+ { /* end of list */ },
+};
+MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids);
+
+static struct css_driver vfio_ccw_sch_driver = {
+ .drv = {
+ .name = "vfio_ccw",
+ .owner = THIS_MODULE,
+ },
+ .subchannel_type = vfio_ccw_sch_ids,
+ .irq = vfio_ccw_sch_irq,
+ .probe = vfio_ccw_sch_probe,
+ .remove = vfio_ccw_sch_remove,
+ .shutdown = vfio_ccw_sch_shutdown,
+ .sch_event = vfio_ccw_sch_event,
+};
+
+static int __init vfio_ccw_sch_init(void)
+{
+ int ret;
+
+ vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
+ if (!vfio_ccw_work_q)
+ return -ENOMEM;
+
+ isc_register(VFIO_CCW_ISC);
+ ret = css_driver_register(&vfio_ccw_sch_driver);
+ if (ret) {
+ isc_unregister(VFIO_CCW_ISC);
+ destroy_workqueue(vfio_ccw_work_q);
+ }
+
+ return ret;
+}
+
+static void __exit vfio_ccw_sch_exit(void)
+{
+ css_driver_unregister(&vfio_ccw_sch_driver);
+ isc_unregister(VFIO_CCW_ISC);
+ destroy_workqueue(vfio_ccw_work_q);
+}
+module_init(vfio_ccw_sch_init);
+module_exit(vfio_ccw_sch_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
new file mode 100644
index 000000000000..80a0559cd7ce
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -0,0 +1,203 @@
+/*
+ * Finite state machine for vfio-ccw device handling
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "ioasm.h"
+#include "vfio_ccw_private.h"
+
+static int fsm_io_helper(struct vfio_ccw_private *private)
+{
+ struct subchannel *sch;
+ union orb *orb;
+ int ccode;
+ __u8 lpm;
+ unsigned long flags;
+
+ sch = private->sch;
+
+ spin_lock_irqsave(sch->lock, flags);
+ private->state = VFIO_CCW_STATE_BUSY;
+ spin_unlock_irqrestore(sch->lock, flags);
+
+ orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
+
+ /* Issue "Start Subchannel" */
+ ccode = ssch(sch->schid, orb);
+
+ switch (ccode) {
+ case 0:
+ /*
+ * Initialize device status information
+ */
+ sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
+ return 0;
+ case 1: /* Status pending */
+ case 2: /* Busy */
+ return -EBUSY;
+ case 3: /* Device/path not operational */
+ {
+ lpm = orb->cmd.lpm;
+ if (lpm != 0)
+ sch->lpm &= ~lpm;
+ else
+ sch->lpm = 0;
+
+ if (cio_update_schib(sch))
+ return -ENODEV;
+
+ return sch->lpm ? -EACCES : -ENODEV;
+ }
+ default:
+ return ccode;
+ }
+}
+
+static void fsm_notoper(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ struct subchannel *sch = private->sch;
+
+ /*
+ * TODO:
+ * Probably we should send the machine check to the guest.
+ */
+ css_sched_sch_todo(sch, SCH_TODO_UNREG);
+ private->state = VFIO_CCW_STATE_NOT_OPER;
+}
+
+/*
+ * No operation action.
+ */
+static void fsm_nop(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+}
+
+static void fsm_io_error(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
+ private->io_region.ret_code = -EIO;
+}
+
+static void fsm_io_busy(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ private->io_region.ret_code = -EBUSY;
+}
+
+static void fsm_disabled_irq(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ struct subchannel *sch = private->sch;
+
+ /*
+ * An interrupt in a disabled state means a previous disable was not
+ * successful - should not happen, but we try to disable again.
+ */
+ cio_disable_subchannel(sch);
+}
+
+/*
+ * Deal with the ccw command request from the userspace.
+ */
+static void fsm_io_request(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ union orb *orb;
+ union scsw *scsw = &private->scsw;
+ struct ccw_io_region *io_region = &private->io_region;
+ struct mdev_device *mdev = private->mdev;
+
+ private->state = VFIO_CCW_STATE_BOXED;
+
+ memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
+
+ if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
+ orb = (union orb *)io_region->orb_area;
+
+ io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
+ orb);
+ if (io_region->ret_code)
+ goto err_out;
+
+ io_region->ret_code = cp_prefetch(&private->cp);
+ if (io_region->ret_code) {
+ cp_free(&private->cp);
+ goto err_out;
+ }
+
+ /* Start channel program and wait for I/O interrupt. */
+ io_region->ret_code = fsm_io_helper(private);
+ if (io_region->ret_code) {
+ cp_free(&private->cp);
+ goto err_out;
+ }
+ return;
+ } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
+ /* XXX: Handle halt. */
+ io_region->ret_code = -EOPNOTSUPP;
+ goto err_out;
+ } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
+ /* XXX: Handle clear. */
+ io_region->ret_code = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+err_out:
+ private->state = VFIO_CCW_STATE_IDLE;
+}
+
+/*
+ * Got an interrupt for a normal io (state busy).
+ */
+static void fsm_irq(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ struct irb *irb = this_cpu_ptr(&cio_irb);
+
+ memcpy(&private->irb, irb, sizeof(*irb));
+
+ queue_work(vfio_ccw_work_q, &private->io_work);
+
+ if (private->completion)
+ complete(private->completion);
+}
+
+/*
+ * Device statemachine
+ */
+fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
+ [VFIO_CCW_STATE_NOT_OPER] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
+ },
+ [VFIO_CCW_STATE_STANDBY] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+ [VFIO_CCW_STATE_IDLE] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_request,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+ [VFIO_CCW_STATE_BOXED] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+ [VFIO_CCW_STATE_BUSY] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+};
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
new file mode 100644
index 000000000000..e72abbc18ee3
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -0,0 +1,425 @@
+/*
+ * Physical device callbacks for vfio_ccw
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "vfio_ccw_private.h"
+
+static int vfio_ccw_mdev_reset(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private;
+ struct subchannel *sch;
+ int ret;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ sch = private->sch;
+ /*
+ * TODO:
+ * In the cureent stage, some things like "no I/O running" and "no
+ * interrupt pending" are clear, but we are not sure what other state
+ * we need to care about.
+ * There are still a lot more instructions need to be handled. We
+ * should come back here later.
+ */
+ ret = vfio_ccw_sch_quiesce(sch);
+ if (ret)
+ return ret;
+
+ ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
+ if (!ret)
+ private->state = VFIO_CCW_STATE_IDLE;
+
+ return ret;
+}
+
+static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct vfio_ccw_private *private =
+ container_of(nb, struct vfio_ccw_private, nb);
+
+ /*
+ * Vendor drivers MUST unpin pages in response to an
+ * invalidation.
+ */
+ if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
+ struct vfio_iommu_type1_dma_unmap *unmap = data;
+
+ if (!cp_iova_pinned(&private->cp, unmap->iova))
+ return NOTIFY_OK;
+
+ if (vfio_ccw_mdev_reset(private->mdev))
+ return NOTIFY_BAD;
+
+ cp_free(&private->cp);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static ssize_t available_instances_show(struct kobject *kobj,
+ struct device *dev, char *buf)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&private->avail));
+}
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static struct attribute *mdev_types_attrs[] = {
+ &mdev_type_attr_name.attr,
+ &mdev_type_attr_device_api.attr,
+ &mdev_type_attr_available_instances.attr,
+ NULL,
+};
+
+static struct attribute_group mdev_type_group = {
+ .name = "io",
+ .attrs = mdev_types_attrs,
+};
+
+struct attribute_group *mdev_type_groups[] = {
+ &mdev_type_group,
+ NULL,
+};
+
+static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+
+ if (private->state == VFIO_CCW_STATE_NOT_OPER)
+ return -ENODEV;
+
+ if (atomic_dec_if_positive(&private->avail) < 0)
+ return -EPERM;
+
+ private->mdev = mdev;
+ private->state = VFIO_CCW_STATE_IDLE;
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+
+ if ((private->state != VFIO_CCW_STATE_NOT_OPER) &&
+ (private->state != VFIO_CCW_STATE_STANDBY)) {
+ if (!vfio_ccw_mdev_reset(mdev))
+ private->state = VFIO_CCW_STATE_STANDBY;
+ /* The state will be NOT_OPER on error. */
+ }
+
+ private->mdev = NULL;
+ atomic_inc(&private->avail);
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_open(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+ unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+
+ private->nb.notifier_call = vfio_ccw_mdev_notifier;
+
+ return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &events, &private->nb);
+}
+
+void vfio_ccw_mdev_release(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+
+ vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &private->nb);
+}
+
+static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
+ char __user *buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct vfio_ccw_private *private;
+ struct ccw_io_region *region;
+
+ if (*ppos + count > sizeof(*region))
+ return -EINVAL;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ region = &private->io_region;
+ if (copy_to_user(buf, (void *)region + *ppos, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
+ const char __user *buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct vfio_ccw_private *private;
+ struct ccw_io_region *region;
+
+ if (*ppos + count > sizeof(*region))
+ return -EINVAL;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ if (private->state != VFIO_CCW_STATE_IDLE)
+ return -EACCES;
+
+ region = &private->io_region;
+ if (copy_from_user((void *)region + *ppos, buf, count))
+ return -EFAULT;
+
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
+ if (region->ret_code != 0) {
+ private->state = VFIO_CCW_STATE_IDLE;
+ return region->ret_code;
+ }
+
+ return count;
+}
+
+static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
+{
+ info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET;
+ info->num_regions = VFIO_CCW_NUM_REGIONS;
+ info->num_irqs = VFIO_CCW_NUM_IRQS;
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
+ u16 *cap_type_id,
+ void **cap_type)
+{
+ switch (info->index) {
+ case VFIO_CCW_CONFIG_REGION_INDEX:
+ info->offset = 0;
+ info->size = sizeof(struct ccw_io_region);
+ info->flags = VFIO_REGION_INFO_FLAG_READ
+ | VFIO_REGION_INFO_FLAG_WRITE;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
+{
+ if (info->index != VFIO_CCW_IO_IRQ_INDEX)
+ return -EINVAL;
+
+ info->count = 1;
+ info->flags = VFIO_IRQ_INFO_EVENTFD;
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
+ uint32_t flags,
+ void __user *data)
+{
+ struct vfio_ccw_private *private;
+ struct eventfd_ctx **ctx;
+
+ if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER))
+ return -EINVAL;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ ctx = &private->io_trigger;
+
+ switch (flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
+ case VFIO_IRQ_SET_DATA_NONE:
+ {
+ if (*ctx)
+ eventfd_signal(*ctx, 1);
+ return 0;
+ }
+ case VFIO_IRQ_SET_DATA_BOOL:
+ {
+ uint8_t trigger;
+
+ if (get_user(trigger, (uint8_t __user *)data))
+ return -EFAULT;
+
+ if (trigger && *ctx)
+ eventfd_signal(*ctx, 1);
+ return 0;
+ }
+ case VFIO_IRQ_SET_DATA_EVENTFD:
+ {
+ int32_t fd;
+
+ if (get_user(fd, (int32_t __user *)data))
+ return -EFAULT;
+
+ if (fd == -1) {
+ if (*ctx)
+ eventfd_ctx_put(*ctx);
+ *ctx = NULL;
+ } else if (fd >= 0) {
+ struct eventfd_ctx *efdctx;
+
+ efdctx = eventfd_ctx_fdget(fd);
+ if (IS_ERR(efdctx))
+ return PTR_ERR(efdctx);
+
+ if (*ctx)
+ eventfd_ctx_put(*ctx);
+
+ *ctx = efdctx;
+ } else
+ return -EINVAL;
+
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+ unsigned long minsz;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = vfio_ccw_mdev_get_device_info(&info);
+ if (ret)
+ return ret;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+ u16 cap_type_id = 0;
+ void *cap_type = NULL;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id,
+ &cap_type);
+ if (ret)
+ return ret;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz || info.index >= VFIO_CCW_NUM_IRQS)
+ return -EINVAL;
+
+ ret = vfio_ccw_mdev_get_irq_info(&info);
+ if (ret)
+ return ret;
+
+ if (info.count == -1)
+ return -EINVAL;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_SET_IRQS:
+ {
+ struct vfio_irq_set hdr;
+ size_t data_size;
+ void __user *data;
+
+ minsz = offsetofend(struct vfio_irq_set, count);
+
+ if (copy_from_user(&hdr, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_set_irqs_validate_and_prepare(&hdr, 1,
+ VFIO_CCW_NUM_IRQS,
+ &data_size);
+ if (ret)
+ return ret;
+
+ data = (void __user *)(arg + minsz);
+ return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, data);
+ }
+ case VFIO_DEVICE_RESET:
+ return vfio_ccw_mdev_reset(mdev);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
+ .owner = THIS_MODULE,
+ .supported_type_groups = mdev_type_groups,
+ .create = vfio_ccw_mdev_create,
+ .remove = vfio_ccw_mdev_remove,
+ .open = vfio_ccw_mdev_open,
+ .release = vfio_ccw_mdev_release,
+ .read = vfio_ccw_mdev_read,
+ .write = vfio_ccw_mdev_write,
+ .ioctl = vfio_ccw_mdev_ioctl,
+};
+
+int vfio_ccw_mdev_reg(struct subchannel *sch)
+{
+ return mdev_register_device(&sch->dev, &vfio_ccw_mdev_ops);
+}
+
+void vfio_ccw_mdev_unreg(struct subchannel *sch)
+{
+ mdev_unregister_device(&sch->dev);
+}
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
new file mode 100644
index 000000000000..fc0f01c16ef9
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -0,0 +1,96 @@
+/*
+ * Private stuff for vfio_ccw driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_PRIVATE_H_
+#define _VFIO_CCW_PRIVATE_H_
+
+#include <linux/completion.h>
+#include <linux/eventfd.h>
+#include <linux/workqueue.h>
+#include <linux/vfio_ccw.h>
+
+#include "css.h"
+#include "vfio_ccw_cp.h"
+
+/**
+ * struct vfio_ccw_private
+ * @sch: pointer to the subchannel
+ * @state: internal state of the device
+ * @completion: synchronization helper of the I/O completion
+ * @avail: available for creating a mediated device
+ * @mdev: pointer to the mediated device
+ * @nb: notifier for vfio events
+ * @io_region: MMIO region to input/output I/O arguments/results
+ * @cp: channel program for the current I/O operation
+ * @irb: irb info received from interrupt
+ * @scsw: scsw info
+ * @io_trigger: eventfd ctx for signaling userspace I/O results
+ * @io_work: work for deferral process of I/O handling
+ */
+struct vfio_ccw_private {
+ struct subchannel *sch;
+ int state;
+ struct completion *completion;
+ atomic_t avail;
+ struct mdev_device *mdev;
+ struct notifier_block nb;
+ struct ccw_io_region io_region;
+
+ struct channel_program cp;
+ struct irb irb;
+ union scsw scsw;
+
+ struct eventfd_ctx *io_trigger;
+ struct work_struct io_work;
+} __aligned(8);
+
+extern int vfio_ccw_mdev_reg(struct subchannel *sch);
+extern void vfio_ccw_mdev_unreg(struct subchannel *sch);
+
+extern int vfio_ccw_sch_quiesce(struct subchannel *sch);
+
+/*
+ * States of the device statemachine.
+ */
+enum vfio_ccw_state {
+ VFIO_CCW_STATE_NOT_OPER,
+ VFIO_CCW_STATE_STANDBY,
+ VFIO_CCW_STATE_IDLE,
+ VFIO_CCW_STATE_BOXED,
+ VFIO_CCW_STATE_BUSY,
+ /* last element! */
+ NR_VFIO_CCW_STATES
+};
+
+/*
+ * Asynchronous events of the device statemachine.
+ */
+enum vfio_ccw_event {
+ VFIO_CCW_EVENT_NOT_OPER,
+ VFIO_CCW_EVENT_IO_REQ,
+ VFIO_CCW_EVENT_INTERRUPT,
+ /* last element! */
+ NR_VFIO_CCW_EVENTS
+};
+
+/*
+ * Action called through jumptable.
+ */
+typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
+extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
+
+static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
+ int event)
+{
+ vfio_ccw_jumptable[private->state][event](private, event);
+}
+
+extern struct workqueue_struct *vfio_ccw_work_q;
+
+#endif
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 058db724b5a2..ea86da8c75f9 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -80,7 +80,7 @@ struct secaeskeytoken {
* token. If keybitsize is given, the bitsize of the key is
* also checked. Returns 0 on success or errno value on failure.
*/
-static int check_secaeskeytoken(u8 *token, int keybitsize)
+static int check_secaeskeytoken(const u8 *token, int keybitsize)
{
struct secaeskeytoken *t = (struct secaeskeytoken *) token;
@@ -1004,6 +1004,53 @@ int pkey_skey2pkey(const struct pkey_seckey *seckey,
EXPORT_SYMBOL(pkey_skey2pkey);
/*
+ * Verify key and give back some info about the key.
+ */
+int pkey_verifykey(const struct pkey_seckey *seckey,
+ u16 *pcardnr, u16 *pdomain,
+ u16 *pkeysize, u32 *pattributes)
+{
+ struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
+ u16 cardnr, domain;
+ u64 mkvp[2];
+ int rc;
+
+ /* check the secure key for valid AES secure key */
+ rc = check_secaeskeytoken((u8 *) seckey, 0);
+ if (rc)
+ goto out;
+ if (pattributes)
+ *pattributes = PKEY_VERIFY_ATTR_AES;
+ if (pkeysize)
+ *pkeysize = t->bitsize;
+
+ /* try to find a card which can handle this key */
+ rc = pkey_findcard(seckey, &cardnr, &domain, 1);
+ if (rc)
+ goto out;
+
+ /* check mkvp for old mkvp match */
+ rc = mkvp_cache_fetch(cardnr, domain, mkvp);
+ if (rc)
+ goto out;
+ if (t->mkvp == mkvp[1]) {
+ DEBUG_DBG("pkey_verifykey secure key has old mkvp\n");
+ if (pattributes)
+ *pattributes |= PKEY_VERIFY_ATTR_OLD_MKVP;
+ }
+
+ if (pcardnr)
+ *pcardnr = cardnr;
+ if (pdomain)
+ *pdomain = domain;
+
+out:
+ DEBUG_DBG("pkey_verifykey rc=%d\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(pkey_verifykey);
+
+/*
* File io functions
*/
@@ -1104,6 +1151,21 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
break;
}
+ case PKEY_VERIFYKEY: {
+ struct pkey_verifykey __user *uvk = (void __user *) arg;
+ struct pkey_verifykey kvk;
+
+ if (copy_from_user(&kvk, uvk, sizeof(kvk)))
+ return -EFAULT;
+ rc = pkey_verifykey(&kvk.seckey, &kvk.cardnr, &kvk.domain,
+ &kvk.keysize, &kvk.attributes);
+ DEBUG_DBG("pkey_ioctl pkey_verifykey()=%d\n", rc);
+ if (rc)
+ break;
+ if (copy_to_user(uvk, &kvk, sizeof(kvk)))
+ return -EFAULT;
+ break;
+ }
default:
/* unknown/unsupported ioctl cmd */
return -ENOTTY;