summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-28 20:17:30 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-28 20:17:30 +0200
commit81eef8909d171bdca6af37028a11a24e011ed312 (patch)
treedaf29a77666fa7ba1876cf3cba5104002e96ab85
parentMerge tag 'ceph-for-6.5-rc4' of https://github.com/ceph/ceph-client (diff)
parentxen: speed up grant-table reclaim (diff)
downloadlinux-81eef8909d171bdca6af37028a11a24e011ed312.tar.xz
linux-81eef8909d171bdca6af37028a11a24e011ed312.zip
Merge tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen fixes from Juergen Gross: - A fix for a performance problem in QubesOS, adding a way to drain the queue of grants experiencing delayed unmaps faster - A patch enabling the use of static event channels from user mode, which was omitted when introducing supporting static event channels - A fix for a problem where Xen related code didn't check properly for running in a Xen environment, resulting in a WARN splat * tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: xen: speed up grant-table reclaim xen/evtchn: Introduce new IOCTL to bind static evtchn xenbus: check xen_domain in xenbus_probe_initcall
-rw-r--r--Documentation/ABI/testing/sysfs-module11
-rw-r--r--drivers/xen/events/events_base.c16
-rw-r--r--drivers/xen/evtchn.c35
-rw-r--r--drivers/xen/grant-table.c40
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c3
-rw-r--r--include/uapi/xen/evtchn.h9
-rw-r--r--include/xen/events.h11
7 files changed, 93 insertions, 32 deletions
diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
index 08886367d047..62addab47d0c 100644
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -60,3 +60,14 @@ Description: Module taint flags:
C staging driver module
E unsigned module
== =====================
+
+What: /sys/module/grant_table/parameters/free_per_iteration
+Date: July 2023
+KernelVersion: 6.5 but backported to all supported stable branches
+Contact: Xen developer discussion <xen-devel@lists.xenproject.org>
+Description: Read and write number of grant entries to attempt to free per iteration.
+
+ Note: Future versions of Xen and Linux may provide a better
+ interface for controlling the rate of deferred grant reclaim
+ or may not need it at all.
+Users: Qubes OS (https://www.qubes-os.org)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index c7715f8bd452..3bdd5b59661d 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -112,6 +112,7 @@ struct irq_info {
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time; /* Time in jiffies when to EOI. */
raw_spinlock_t lock;
+ bool is_static; /* Is event channel static */
union {
unsigned short virq;
@@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq);
}
-static void xen_evtchn_close(evtchn_port_t port)
-{
- struct evtchn_close close;
-
- close.port = port;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-}
-
/* Not called for lateeoi events. */
static void event_handler_exit(struct irq_info *info)
{
@@ -982,7 +974,8 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
- xen_evtchn_close(evtchn);
+ if (!info->is_static)
+ xen_evtchn_close(evtchn);
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
@@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority)
}
EXPORT_SYMBOL_GPL(xen_set_irq_priority);
-int evtchn_make_refcounted(evtchn_port_t evtchn)
+int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
@@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn)
WARN_ON(info->refcnt != -1);
info->refcnt = 1;
+ info->is_static = is_static;
return 0;
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index c99415a70051..9139a7364df5 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u)
return 0;
}
-static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
+static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
+ bool is_static)
{
struct user_evtchn *evtchn;
- struct evtchn_close close;
int rc = 0;
/*
@@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
if (rc < 0)
goto err;
- rc = evtchn_make_refcounted(port);
+ rc = evtchn_make_refcounted(port, is_static);
return rc;
err:
/* bind failed, should close the port now */
- close.port = port;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
+ if (!is_static)
+ xen_evtchn_close(port);
+
del_evtchn(u, evtchn);
return rc;
}
@@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
- rc = evtchn_bind_to_user(u, bind_virq.port);
+ rc = evtchn_bind_to_user(u, bind_virq.port, false);
if (rc == 0)
rc = bind_virq.port;
break;
@@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
- rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+ rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false);
if (rc == 0)
rc = bind_interdomain.local_port;
break;
@@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
- rc = evtchn_bind_to_user(u, alloc_unbound.port);
+ rc = evtchn_bind_to_user(u, alloc_unbound.port, false);
if (rc == 0)
rc = alloc_unbound.port;
break;
@@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file,
break;
}
+ case IOCTL_EVTCHN_BIND_STATIC: {
+ struct ioctl_evtchn_bind bind;
+ struct user_evtchn *evtchn;
+
+ rc = -EFAULT;
+ if (copy_from_user(&bind, uarg, sizeof(bind)))
+ break;
+
+ rc = -EISCONN;
+ evtchn = find_evtchn(u, bind.port);
+ if (evtchn)
+ break;
+
+ rc = evtchn_bind_to_user(u, bind.port, true);
+ break;
+ }
+
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
struct user_evtchn *evtchn;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index e1ec725c2819..f13c3b76ad1e 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
static void gnttab_handle_deferred(struct timer_list *);
static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
+static atomic64_t deferred_count;
+static atomic64_t leaked_count;
+static unsigned int free_per_iteration = 10;
+module_param(free_per_iteration, uint, 0600);
+
static void gnttab_handle_deferred(struct timer_list *unused)
{
- unsigned int nr = 10;
+ unsigned int nr = READ_ONCE(free_per_iteration);
+ const bool ignore_limit = nr == 0;
struct deferred_entry *first = NULL;
unsigned long flags;
+ size_t freed = 0;
spin_lock_irqsave(&gnttab_list_lock, flags);
- while (nr--) {
+ while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
struct deferred_entry *entry
= list_first_entry(&deferred_list,
struct deferred_entry, list);
@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused)
list_del(&entry->list);
spin_unlock_irqrestore(&gnttab_list_lock, flags);
if (_gnttab_end_foreign_access_ref(entry->ref)) {
+ uint64_t ret = atomic64_dec_return(&deferred_count);
+
put_free_entry(entry->ref);
- pr_debug("freeing g.e. %#x (pfn %#lx)\n",
- entry->ref, page_to_pfn(entry->page));
+ pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
+ entry->ref, page_to_pfn(entry->page),
+ (unsigned long long)ret);
put_page(entry->page);
+ freed++;
kfree(entry);
entry = NULL;
} else {
@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused)
spin_lock_irqsave(&gnttab_list_lock, flags);
if (entry)
list_add_tail(&entry->list, &deferred_list);
- else if (list_empty(&deferred_list))
- break;
}
- if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
+ if (list_empty(&deferred_list))
+ WARN_ON(atomic64_read(&deferred_count));
+ else if (!timer_pending(&deferred_timer)) {
deferred_timer.expires = jiffies + HZ;
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ pr_debug("Freed %zu references", freed);
}
static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
{
struct deferred_entry *entry;
gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
- const char *what = KERN_WARNING "leaking";
+ uint64_t leaked, deferred;
entry = kmalloc(sizeof(*entry), gfp);
if (!page) {
@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
- what = KERN_DEBUG "deferring";
+ deferred = atomic64_inc_return(&deferred_count);
+ leaked = atomic64_read(&leaked_count);
+ pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+ ref, page ? page_to_pfn(page) : -1, deferred, leaked);
+ } else {
+ deferred = atomic64_read(&deferred_count);
+ leaked = atomic64_inc_return(&leaked_count);
+ pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+ ref, page ? page_to_pfn(page) : -1, deferred, leaked);
}
- printk("%s g.e. %#x (pfn %#lx)\n",
- what, ref, page ? page_to_pfn(page) : -1);
}
int gnttab_try_end_foreign_access(grant_ref_t ref)
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 58b732dcbfb8..639bf628389b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused)
static int __init xenbus_probe_initcall(void)
{
+ if (!xen_domain())
+ return -ENODEV;
+
/*
* Probe XenBus here in the XS_PV case, and also XS_HVM unless we
* need to wait for the platform PCI device to come up or
diff --git a/include/uapi/xen/evtchn.h b/include/uapi/xen/evtchn.h
index 7fbf732f168f..aef2b75f3413 100644
--- a/include/uapi/xen/evtchn.h
+++ b/include/uapi/xen/evtchn.h
@@ -101,4 +101,13 @@ struct ioctl_evtchn_restrict_domid {
domid_t domid;
};
+/*
+ * Bind statically allocated @port.
+ */
+#define IOCTL_EVTCHN_BIND_STATIC \
+ _IOC(_IOC_NONE, 'E', 7, sizeof(struct ioctl_evtchn_bind))
+struct ioctl_evtchn_bind {
+ unsigned int port;
+};
+
#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
diff --git a/include/xen/events.h b/include/xen/events.h
index ac1281c5ead6..95970a2f7695 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -69,7 +69,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority);
/*
* Allow extra references to event channels exposed to userspace by evtchn
*/
-int evtchn_make_refcounted(evtchn_port_t evtchn);
+int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static);
int evtchn_get(evtchn_port_t evtchn);
void evtchn_put(evtchn_port_t evtchn);
@@ -141,4 +141,13 @@ void xen_init_IRQ(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
+static inline void xen_evtchn_close(evtchn_port_t port)
+{
+ struct evtchn_close close;
+
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+}
+
#endif /* _XEN_EVENTS_H */