summaryrefslogtreecommitdiffstats
path: root/drivers/vfio/virqfd.c
blob: d22881245e89146891d236eed9351f49dec4f200 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// SPDX-License-Identifier: GPL-2.0-only
/*
 * VFIO generic eventfd code for IRQFD support.
 * Derived from drivers/vfio/pci/vfio_pci_intrs.c
 *
 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
 *     Author: Alex Williamson <alex.williamson@redhat.com>
 */

#include <linux/vfio.h>
#include <linux/eventfd.h>
#include <linux/file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include "vfio.h"

static struct workqueue_struct *vfio_irqfd_cleanup_wq;
static DEFINE_SPINLOCK(virqfd_lock);

int __init vfio_virqfd_init(void)
{
	vfio_irqfd_cleanup_wq =
		create_singlethread_workqueue("vfio-irqfd-cleanup");
	if (!vfio_irqfd_cleanup_wq)
		return -ENOMEM;

	return 0;
}

void vfio_virqfd_exit(void)
{
	destroy_workqueue(vfio_irqfd_cleanup_wq);
}

static void virqfd_deactivate(struct virqfd *virqfd)
{
	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
}

static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
	__poll_t flags = key_to_poll(key);

	if (flags & EPOLLIN) {
		u64 cnt;
		eventfd_ctx_do_read(virqfd->eventfd, &cnt);

		/* An event has been signaled, call function */
		if ((!virqfd->handler ||
		     virqfd->handler(virqfd->opaque, virqfd->data)) &&
		    virqfd->thread)
			schedule_work(&virqfd->inject);
	}

	if (flags & EPOLLHUP) {
		unsigned long flags;
		spin_lock_irqsave(&virqfd_lock, flags);

		/*
		 * The eventfd is closing, if the virqfd has not yet been
		 * queued for release, as determined by testing whether the
		 * virqfd pointer to it is still valid, queue it now.  As
		 * with kvm irqfds, we know we won't race against the virqfd
		 * going away because we hold the lock to get here.
		 */
		if (*(virqfd->pvirqfd) == virqfd) {
			*(virqfd->pvirqfd) = NULL;
			virqfd_deactivate(virqfd);
		}

		spin_unlock_irqrestore(&virqfd_lock, flags);
	}

	return 0;
}

static void virqfd_ptable_queue_proc(struct file *file,
				     wait_queue_head_t *wqh, poll_table *pt)
{
	struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
	add_wait_queue(wqh, &virqfd->wait);
}

static void virqfd_shutdown(struct work_struct *work)
{
	struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
	u64 cnt;

	eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
	flush_work(&virqfd->inject);
	eventfd_ctx_put(virqfd->eventfd);

	kfree(virqfd);
}

static void virqfd_inject(struct work_struct *work)
{
	struct virqfd *virqfd = container_of(work, struct virqfd, inject);
	if (virqfd->thread)
		virqfd->thread(virqfd->opaque, virqfd->data);
}

static void virqfd_flush_inject(struct work_struct *work)
{
	struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject);

	flush_work(&virqfd->inject);
}

int vfio_virqfd_enable(void *opaque,
		       int (*handler)(void *, void *),
		       void (*thread)(void *, void *),
		       void *data, struct virqfd **pvirqfd, int fd)
{
	struct fd irqfd;
	struct eventfd_ctx *ctx;
	struct virqfd *virqfd;
	int ret = 0;
	__poll_t events;

	virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT);
	if (!virqfd)
		return -ENOMEM;

	virqfd->pvirqfd = pvirqfd;
	virqfd->opaque = opaque;
	virqfd->handler = handler;
	virqfd->thread = thread;
	virqfd->data = data;

	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
	INIT_WORK(&virqfd->inject, virqfd_inject);
	INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject);

	irqfd = fdget(fd);
	if (!fd_file(irqfd)) {
		ret = -EBADF;
		goto err_fd;
	}

	ctx = eventfd_ctx_fileget(fd_file(irqfd));
	if (IS_ERR(ctx)) {
		ret = PTR_ERR(ctx);
		goto err_ctx;
	}

	virqfd->eventfd = ctx;

	/*
	 * virqfds can be released by closing the eventfd or directly
	 * through ioctl.  These are both done through a workqueue, so
	 * we update the pointer to the virqfd under lock to avoid
	 * pushing multiple jobs to release the same virqfd.
	 */
	spin_lock_irq(&virqfd_lock);

	if (*pvirqfd) {
		spin_unlock_irq(&virqfd_lock);
		ret = -EBUSY;
		goto err_busy;
	}
	*pvirqfd = virqfd;

	spin_unlock_irq(&virqfd_lock);

	/*
	 * Install our own custom wake-up handling so we are notified via
	 * a callback whenever someone signals the underlying eventfd.
	 */
	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);

	events = vfs_poll(fd_file(irqfd), &virqfd->pt);

	/*
	 * Check if there was an event already pending on the eventfd
	 * before we registered and trigger it as if we didn't miss it.
	 */
	if (events & EPOLLIN) {
		if ((!handler || handler(opaque, data)) && thread)
			schedule_work(&virqfd->inject);
	}

	/*
	 * Do not drop the file until the irqfd is fully initialized,
	 * otherwise we might race against the EPOLLHUP.
	 */
	fdput(irqfd);

	return 0;
err_busy:
	eventfd_ctx_put(ctx);
err_ctx:
	fdput(irqfd);
err_fd:
	kfree(virqfd);

	return ret;
}
EXPORT_SYMBOL_GPL(vfio_virqfd_enable);

void vfio_virqfd_disable(struct virqfd **pvirqfd)
{
	unsigned long flags;

	spin_lock_irqsave(&virqfd_lock, flags);

	if (*pvirqfd) {
		virqfd_deactivate(*pvirqfd);
		*pvirqfd = NULL;
	}

	spin_unlock_irqrestore(&virqfd_lock, flags);

	/*
	 * Block until we know all outstanding shutdown jobs have completed.
	 * Even if we don't queue the job, flush the wq to be sure it's
	 * been released.
	 */
	flush_workqueue(vfio_irqfd_cleanup_wq);
}
EXPORT_SYMBOL_GPL(vfio_virqfd_disable);

void vfio_virqfd_flush_thread(struct virqfd **pvirqfd)
{
	unsigned long flags;

	spin_lock_irqsave(&virqfd_lock, flags);
	if (*pvirqfd && (*pvirqfd)->thread)
		queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject);
	spin_unlock_irqrestore(&virqfd_lock, flags);

	flush_workqueue(vfio_irqfd_cleanup_wq);
}
EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread);