diff options
author | Alexey Kardashevskiy <aik@ozlabs.ru> | 2017-03-22 05:21:56 +0100 |
---|---|---|
committer | Paul Mackerras <paulus@ozlabs.org> | 2017-04-20 03:39:26 +0200 |
commit | 121f80ba68f1a5779a36d7b3247206e60e0a7418 (patch) | |
tree | 62c67d9c81f80ff0fb7f5c672c86f114a7460d52 /virt/kvm/vfio.c | |
parent | KVM: PPC: iommu: Unify TCE checking (diff) | |
download | linux-121f80ba68f1a5779a36d7b3247206e60e0a7418.tar.xz linux-121f80ba68f1a5779a36d7b3247206e60e0a7418.zip |
KVM: PPC: VFIO: Add in-kernel acceleration for VFIO
This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT
and H_STUFF_TCE requests targeted an IOMMU TCE table used for VFIO
without passing them to user space which saves time on switching
to user space and back.
This adds H_PUT_TCE/H_PUT_TCE_INDIRECT/H_STUFF_TCE handlers to KVM.
KVM tries to handle a TCE request in the real mode, if failed
it passes the request to the virtual mode to complete the operation.
If it a virtual mode handler fails, the request is passed to
the user space; this is not expected to happen though.
To avoid dealing with page use counters (which is tricky in real mode),
this only accelerates SPAPR TCE IOMMU v2 clients which are required
to pre-register the userspace memory. The very first TCE request will
be handled in the VFIO SPAPR TCE driver anyway as the userspace view
of the TCE table (iommu_table::it_userspace) is not allocated till
the very first mapping happens and we cannot call vmalloc in real mode.
If we fail to update a hardware IOMMU table unexpected reason, we just
clear it and move on as there is nothing really we can do about it -
for example, if we hot plug a VFIO device to a guest, existing TCE tables
will be mirrored automatically to the hardware and there is no interface
to report to the guest about possible failures.
This adds new attribute - KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE - to
the VFIO KVM device. It takes a VFIO group fd and SPAPR TCE table fd
and associates a physical IOMMU table with the SPAPR TCE table (which
is a guest view of the hardware IOMMU table). The iommu_table object
is cached and referenced so we do not have to look up for it in real mode.
This does not implement the UNSET counterpart as there is no use for it -
once the acceleration is enabled, the existing userspace won't
disable it unless a VFIO container is destroyed; this adds necessary
cleanup to the KVM_DEV_VFIO_GROUP_DEL handler.
This advertises the new KVM_CAP_SPAPR_TCE_VFIO capability to the user
space.
This adds real mode version of WARN_ON_ONCE() as the generic version
causes problems with rcu_sched. Since we testing what vmalloc_to_phys()
returns in the code, this also adds a check for already existing
vmalloc_to_phys() call in kvmppc_rm_h_put_tce_indirect().
This finally makes use of vfio_external_user_iommu_id() which was
introduced quite some time ago and was considered for removal.
Tests show that this patch increases transmission speed from 220MB/s
to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card).
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Diffstat (limited to 'virt/kvm/vfio.c')
-rw-r--r-- | virt/kvm/vfio.c | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d32f239eb471..37d9118fd84b 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -20,6 +20,10 @@ #include <linux/vfio.h> #include "vfio.h" +#ifdef CONFIG_SPAPR_TCE_IOMMU +#include <asm/kvm_ppc.h> +#endif + struct kvm_vfio_group { struct list_head node; struct vfio_group *vfio_group; @@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) return ret > 0; } +#ifdef CONFIG_SPAPR_TCE_IOMMU +static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group) +{ + int (*fn)(struct vfio_group *); + int ret = -EINVAL; + + fn = symbol_get(vfio_external_user_iommu_id); + if (!fn) + return ret; + + ret = fn(vfio_group); + + symbol_put(vfio_external_user_iommu_id); + + return ret; +} + +static struct iommu_group *kvm_vfio_group_get_iommu_group( + struct vfio_group *group) +{ + int group_id = kvm_vfio_external_user_iommu_id(group); + + if (group_id < 0) + return NULL; + + return iommu_group_get_by_id(group_id); +} + +static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm, + struct vfio_group *vfio_group) +{ + struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group); + + if (WARN_ON_ONCE(!grp)) + return; + + kvm_spapr_tce_release_iommu_group(kvm, grp); + iommu_group_put(grp); +} +#endif + /* * Groups can use the same or different IOMMU domains. If the same then * adding a new group may change the coherency of groups we've previously @@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group); +#endif kvm_vfio_group_set_kvm(vfio_group, NULL); kvm_vfio_group_put_external_user(vfio_group); @@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) kvm_vfio_update_coherency(dev); return ret; + +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: { + struct kvm_vfio_spapr_tce param; + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + struct fd f; + struct iommu_group *grp; + + if (copy_from_user(¶m, (void __user *)arg, + sizeof(struct kvm_vfio_spapr_tce))) + return -EFAULT; + + f = fdget(param.groupfd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + grp = kvm_vfio_group_get_iommu_group(vfio_group); + if (WARN_ON_ONCE(!grp)) { + kvm_vfio_group_put_external_user(vfio_group); + return -EIO; + } + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, + param.tablefd, grp); + break; + } + + mutex_unlock(&kv->lock); + + iommu_group_put(grp); + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } +#endif /* CONFIG_SPAPR_TCE_IOMMU */ } return -ENXIO; @@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_VFIO_GROUP_ADD: case KVM_DEV_VFIO_GROUP_DEL: +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: +#endif return 0; } @@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev) struct kvm_vfio_group *kvg, *tmp; list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group); +#endif kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); |