From 72e59c30df449bc7fe601716e60c824b4ffe606d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 5 Jul 2017 13:08:39 -0700 Subject: xen/pvcalls: introduce the pvcalls xenbus backend Introduce a xenbus backend for the pvcalls protocol, as defined by https://xenbits.xen.org/docs/unstable/misc/pvcalls.html. This patch only adds the stubs, the code will be added by the following patches. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 drivers/xen/pvcalls-back.c (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c new file mode 100644 index 000000000000..f3d0daad3da6 --- /dev/null +++ b/drivers/xen/pvcalls-back.c @@ -0,0 +1,61 @@ +/* + * (c) 2017 Stefano Stabellini + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static int pvcalls_back_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + return 0; +} + +static void pvcalls_back_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ +} + +static int pvcalls_back_remove(struct xenbus_device *dev) +{ + return 0; +} + +static int pvcalls_back_uevent(struct xenbus_device *xdev, + struct kobj_uevent_env *env) +{ + return 0; +} + +static const struct xenbus_device_id pvcalls_back_ids[] = { + { "pvcalls" }, + { "" } +}; + +static struct xenbus_driver pvcalls_back_driver = { + .ids = pvcalls_back_ids, + .probe = pvcalls_back_probe, + .remove = pvcalls_back_remove, + .uevent = pvcalls_back_uevent, + .otherend_changed = pvcalls_back_changed, +}; -- cgit v1.2.3 From 9be07334f99e1f9e0b244d73528bc3afce126735 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 5 Jul 2017 13:08:48 -0700 Subject: xen/pvcalls: initialize the module and register the xenbus backend Keep a list of connected frontends. Use a semaphore to protect list accesses. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index f3d0daad3da6..9044cf21b31c 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -25,6 +25,11 @@ #include #include +struct pvcalls_back_global { + struct list_head frontends; + struct semaphore frontends_lock; +} pvcalls_back_global; + static int pvcalls_back_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { @@ -59,3 +64,20 @@ static struct xenbus_driver pvcalls_back_driver = { .uevent = pvcalls_back_uevent, .otherend_changed = pvcalls_back_changed, }; + +static int __init pvcalls_back_init(void) +{ + int ret; + + if (!xen_domain()) + return -ENODEV; + + ret = xenbus_register_backend(&pvcalls_back_driver); + if (ret < 0) + return ret; + + sema_init(&pvcalls_back_global.frontends_lock, 1); + INIT_LIST_HEAD(&pvcalls_back_global.frontends); + return 0; +} +module_init(pvcalls_back_init); -- cgit v1.2.3 From 0a9c75c2c7258f2c50d2c62430ccca3eec9f866f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 10:59:17 -0700 Subject: xen/pvcalls: xenbus state handling Introduce the code to handle xenbus state changes. Implement the probe function for the pvcalls backend. Write the supported versions, max-page-order and function-calls nodes to xenstore, as required by the protocol. Introduce stub functions for disconnecting/connecting to a frontend. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 155 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 9044cf21b31c..72d2fce27a34 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -25,20 +25,175 @@ #include #include +#define PVCALLS_VERSIONS "1" +#define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER + struct pvcalls_back_global { struct list_head frontends; struct semaphore frontends_lock; } pvcalls_back_global; +static int backend_connect(struct xenbus_device *dev) +{ + return 0; +} + +static int backend_disconnect(struct xenbus_device *dev) +{ + return 0; +} + static int pvcalls_back_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { + int err, abort; + struct xenbus_transaction xbt; + +again: + abort = 1; + + err = xenbus_transaction_start(&xbt); + if (err) { + pr_warn("%s cannot create xenstore transaction\n", __func__); + return err; + } + + err = xenbus_printf(xbt, dev->nodename, "versions", "%s", + PVCALLS_VERSIONS); + if (err) { + pr_warn("%s write out 'versions' failed\n", __func__); + goto abort; + } + + err = xenbus_printf(xbt, dev->nodename, "max-page-order", "%u", + MAX_RING_ORDER); + if (err) { + pr_warn("%s write out 'max-page-order' failed\n", __func__); + goto abort; + } + + err = xenbus_printf(xbt, dev->nodename, "function-calls", + XENBUS_FUNCTIONS_CALLS); + if (err) { + pr_warn("%s write out 'function-calls' failed\n", __func__); + goto abort; + } + + abort = 0; +abort: + err = xenbus_transaction_end(xbt, abort); + if (err) { + if (err == -EAGAIN && !abort) + goto again; + pr_warn("%s cannot complete xenstore transaction\n", __func__); + return err; + } + + if (abort) + return -EFAULT; + + xenbus_switch_state(dev, XenbusStateInitWait); + return 0; } +static void set_backend_state(struct xenbus_device *dev, + enum xenbus_state state) +{ + while (dev->state != state) { + switch (dev->state) { + case XenbusStateClosed: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + xenbus_switch_state(dev, XenbusStateInitWait); + break; + case XenbusStateClosing: + xenbus_switch_state(dev, XenbusStateClosing); + break; + default: + __WARN(); + } + break; + case XenbusStateInitWait: + case XenbusStateInitialised: + switch (state) { + case XenbusStateConnected: + backend_connect(dev); + xenbus_switch_state(dev, XenbusStateConnected); + break; + case XenbusStateClosing: + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosing); + break; + default: + __WARN(); + } + break; + case XenbusStateConnected: + switch (state) { + case XenbusStateInitWait: + case XenbusStateClosing: + case XenbusStateClosed: + down(&pvcalls_back_global.frontends_lock); + backend_disconnect(dev); + up(&pvcalls_back_global.frontends_lock); + xenbus_switch_state(dev, XenbusStateClosing); + break; + default: + __WARN(); + } + break; + case XenbusStateClosing: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + break; + default: + __WARN(); + } + break; + default: + __WARN(); + } + } +} + static void pvcalls_back_changed(struct xenbus_device *dev, enum xenbus_state frontend_state) { + switch (frontend_state) { + case XenbusStateInitialising: + set_backend_state(dev, XenbusStateInitWait); + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + set_backend_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosing: + set_backend_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + set_backend_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + device_unregister(&dev->dev); + break; + case XenbusStateUnknown: + set_backend_state(dev, XenbusStateClosed); + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } } static int pvcalls_back_remove(struct xenbus_device *dev) -- cgit v1.2.3 From d0e4d560c2433d29d11219567958b12bfe596d22 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 10:59:29 -0700 Subject: xen/pvcalls: connect to a frontend Introduce a per-frontend data structure named pvcalls_fedata. It contains pointers to the command ring, its event channel, a list of active sockets and a tree of passive sockets (passing sockets need to be looked up from the id on listen, accept and poll commands, while active sockets only on release). It also has an unbound workqueue to schedule the work of parsing and executing commands on the command ring. socket_lock protects the two lists. In pvcalls_back_global, keep a list of connected frontends. [ boris: fixed whitespaces/long lines ] Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 72d2fce27a34..48b71236192d 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -33,9 +33,91 @@ struct pvcalls_back_global { struct semaphore frontends_lock; } pvcalls_back_global; +/* + * Per-frontend data structure. It contains pointers to the command + * ring, its event channel, a list of active sockets and a tree of + * passive sockets. + */ +struct pvcalls_fedata { + struct list_head list; + struct xenbus_device *dev; + struct xen_pvcalls_sring *sring; + struct xen_pvcalls_back_ring ring; + int irq; + struct list_head socket_mappings; + struct radix_tree_root socketpass_mappings; + struct semaphore socket_lock; +}; + +static irqreturn_t pvcalls_back_event(int irq, void *dev_id) +{ + return IRQ_HANDLED; +} + static int backend_connect(struct xenbus_device *dev) { + int err, evtchn; + grant_ref_t ring_ref; + struct pvcalls_fedata *fedata = NULL; + + fedata = kzalloc(sizeof(struct pvcalls_fedata), GFP_KERNEL); + if (!fedata) + return -ENOMEM; + + fedata->irq = -1; + err = xenbus_scanf(XBT_NIL, dev->otherend, "port", "%u", + &evtchn); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/event-channel", + dev->otherend); + goto error; + } + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", "%u", &ring_ref); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", + dev->otherend); + goto error; + } + + err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn); + if (err < 0) + goto error; + fedata->irq = err; + + err = request_threaded_irq(fedata->irq, NULL, pvcalls_back_event, + IRQF_ONESHOT, "pvcalls-back", dev); + if (err < 0) + goto error; + + err = xenbus_map_ring_valloc(dev, &ring_ref, 1, + (void **)&fedata->sring); + if (err < 0) + goto error; + + BACK_RING_INIT(&fedata->ring, fedata->sring, XEN_PAGE_SIZE * 1); + fedata->dev = dev; + + INIT_LIST_HEAD(&fedata->socket_mappings); + INIT_RADIX_TREE(&fedata->socketpass_mappings, GFP_KERNEL); + sema_init(&fedata->socket_lock, 1); + dev_set_drvdata(&dev->dev, fedata); + + down(&pvcalls_back_global.frontends_lock); + list_add_tail(&fedata->list, &pvcalls_back_global.frontends); + up(&pvcalls_back_global.frontends_lock); + return 0; + + error: + if (fedata->irq >= 0) + unbind_from_irqhandler(fedata->irq, dev); + if (fedata->sring != NULL) + xenbus_unmap_ring_vfree(dev, fedata->sring); + kfree(fedata); + return err; } static int backend_disconnect(struct xenbus_device *dev) -- cgit v1.2.3 From b1efa69317e5e7e813620af180f262a0fc1db47c Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:00:00 -0700 Subject: xen/pvcalls: handle commands from the frontend When the other end notifies us that there are commands to be read (pvcalls_back_event), wake up the backend thread to parse the command. The command ring works like most other Xen rings, so use the usual ring macros to read and write to it. The functions implementing the commands are empty stubs for now. [ boris: fixed whitespaces ] Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 125 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 48b71236192d..a92e81d25f07 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -49,8 +49,133 @@ struct pvcalls_fedata { struct semaphore socket_lock; }; +static int pvcalls_back_socket(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + return 0; +} + +static int pvcalls_back_connect(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + return 0; +} + +static int pvcalls_back_release(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + return 0; +} + +static int pvcalls_back_bind(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + return 0; +} + +static int pvcalls_back_listen(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + return 0; +} + +static int pvcalls_back_accept(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + return 0; +} + +static int pvcalls_back_poll(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + return 0; +} + +static int pvcalls_back_handle_cmd(struct xenbus_device *dev, + struct xen_pvcalls_request *req) +{ + int ret = 0; + + switch (req->cmd) { + case PVCALLS_SOCKET: + ret = pvcalls_back_socket(dev, req); + break; + case PVCALLS_CONNECT: + ret = pvcalls_back_connect(dev, req); + break; + case PVCALLS_RELEASE: + ret = pvcalls_back_release(dev, req); + break; + case PVCALLS_BIND: + ret = pvcalls_back_bind(dev, req); + break; + case PVCALLS_LISTEN: + ret = pvcalls_back_listen(dev, req); + break; + case PVCALLS_ACCEPT: + ret = pvcalls_back_accept(dev, req); + break; + case PVCALLS_POLL: + ret = pvcalls_back_poll(dev, req); + break; + default: + { + struct pvcalls_fedata *fedata; + struct xen_pvcalls_response *rsp; + + fedata = dev_get_drvdata(&dev->dev); + rsp = RING_GET_RESPONSE( + &fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->ret = -ENOTSUPP; + break; + } + } + return ret; +} + +static void pvcalls_back_work(struct pvcalls_fedata *fedata) +{ + int notify, notify_all = 0, more = 1; + struct xen_pvcalls_request req; + struct xenbus_device *dev = fedata->dev; + + while (more) { + while (RING_HAS_UNCONSUMED_REQUESTS(&fedata->ring)) { + RING_COPY_REQUEST(&fedata->ring, + fedata->ring.req_cons++, + &req); + + if (!pvcalls_back_handle_cmd(dev, &req)) { + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY( + &fedata->ring, notify); + notify_all += notify; + } + } + + if (notify_all) { + notify_remote_via_irq(fedata->irq); + notify_all = 0; + } + + RING_FINAL_CHECK_FOR_REQUESTS(&fedata->ring, more); + } +} + static irqreturn_t pvcalls_back_event(int irq, void *dev_id) { + struct xenbus_device *dev = dev_id; + struct pvcalls_fedata *fedata = NULL; + + if (dev == NULL) + return IRQ_HANDLED; + + fedata = dev_get_drvdata(&dev->dev); + if (fedata == NULL) + return IRQ_HANDLED; + + pvcalls_back_work(fedata); return IRQ_HANDLED; } -- cgit v1.2.3 From fb0298754ab79f0aca1a8162f9aeb5b097c0a1b1 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:00 -0700 Subject: xen/pvcalls: implement socket command Just reply with success to the other end for now. Delay the allocation of the actual socket to bind and/or connect. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index a92e81d25f07..8f436e51053a 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -12,12 +12,17 @@ * GNU General Public License for more details. */ +#include #include #include #include #include #include #include +#include +#include +#include +#include #include #include @@ -52,6 +57,28 @@ struct pvcalls_fedata { static int pvcalls_back_socket(struct xenbus_device *dev, struct xen_pvcalls_request *req) { + struct pvcalls_fedata *fedata; + int ret; + struct xen_pvcalls_response *rsp; + + fedata = dev_get_drvdata(&dev->dev); + + if (req->u.socket.domain != AF_INET || + req->u.socket.type != SOCK_STREAM || + (req->u.socket.protocol != IPPROTO_IP && + req->u.socket.protocol != AF_INET)) + ret = -EAFNOSUPPORT; + else + ret = 0; + + /* leave the actual socket allocation for later */ + + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.socket.id = req->u.socket.id; + rsp->ret = ret; + return 0; } -- cgit v1.2.3 From 5db4d286a8ef88a04247a005df9cdd4bce3c7673 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:06 -0700 Subject: xen/pvcalls: implement connect command Allocate a socket. Keep track of socket <-> ring mappings with a new data structure, called sock_mapping. Implement the connect command by calling inet_stream_connect, and mapping the new indexes page and data ring. Allocate a workqueue and a work_struct, called ioworker, to perform reads and writes to the socket. When an active socket is closed (sk_state_change), set in_error to -ENOTCONN and notify the other end, as specified by the protocol. sk_data_ready and pvcalls_back_ioworker will be implemented later. [ boris: fixed whitespaces ] Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 179 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 8f436e51053a..c0fc774221ca 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -54,6 +54,39 @@ struct pvcalls_fedata { struct semaphore socket_lock; }; +struct pvcalls_ioworker { + struct work_struct register_work; + struct workqueue_struct *wq; +}; + +struct sock_mapping { + struct list_head list; + struct pvcalls_fedata *fedata; + struct socket *sock; + uint64_t id; + grant_ref_t ref; + struct pvcalls_data_intf *ring; + void *bytes; + struct pvcalls_data data; + uint32_t ring_order; + int irq; + atomic_t read; + atomic_t write; + atomic_t io; + atomic_t release; + void (*saved_data_ready)(struct sock *sk); + struct pvcalls_ioworker ioworker; +}; + +static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map); +static int pvcalls_back_release_active(struct xenbus_device *dev, + struct pvcalls_fedata *fedata, + struct sock_mapping *map); + +static void pvcalls_back_ioworker(struct work_struct *work) +{ +} + static int pvcalls_back_socket(struct xenbus_device *dev, struct xen_pvcalls_request *req) { @@ -82,8 +115,149 @@ static int pvcalls_back_socket(struct xenbus_device *dev, return 0; } +static void pvcalls_sk_state_change(struct sock *sock) +{ + struct sock_mapping *map = sock->sk_user_data; + struct pvcalls_data_intf *intf; + + if (map == NULL) + return; + + intf = map->ring; + intf->in_error = -ENOTCONN; + notify_remote_via_irq(map->irq); +} + +static void pvcalls_sk_data_ready(struct sock *sock) +{ +} + +static struct sock_mapping *pvcalls_new_active_socket( + struct pvcalls_fedata *fedata, + uint64_t id, + grant_ref_t ref, + uint32_t evtchn, + struct socket *sock) +{ + int ret; + struct sock_mapping *map; + void *page; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) + return NULL; + + map->fedata = fedata; + map->sock = sock; + map->id = id; + map->ref = ref; + + ret = xenbus_map_ring_valloc(fedata->dev, &ref, 1, &page); + if (ret < 0) + goto out; + map->ring = page; + map->ring_order = map->ring->ring_order; + /* first read the order, then map the data ring */ + virt_rmb(); + if (map->ring_order > MAX_RING_ORDER) { + pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n", + __func__, map->ring_order, MAX_RING_ORDER); + goto out; + } + ret = xenbus_map_ring_valloc(fedata->dev, map->ring->ref, + (1 << map->ring_order), &page); + if (ret < 0) + goto out; + map->bytes = page; + + ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, + evtchn, + pvcalls_back_conn_event, + 0, + "pvcalls-backend", + map); + if (ret < 0) + goto out; + map->irq = ret; + + map->data.in = map->bytes; + map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order); + + map->ioworker.wq = alloc_workqueue("pvcalls_io", WQ_UNBOUND, 1); + if (!map->ioworker.wq) + goto out; + atomic_set(&map->io, 1); + INIT_WORK(&map->ioworker.register_work, pvcalls_back_ioworker); + + down(&fedata->socket_lock); + list_add_tail(&map->list, &fedata->socket_mappings); + up(&fedata->socket_lock); + + write_lock_bh(&map->sock->sk->sk_callback_lock); + map->saved_data_ready = map->sock->sk->sk_data_ready; + map->sock->sk->sk_user_data = map; + map->sock->sk->sk_data_ready = pvcalls_sk_data_ready; + map->sock->sk->sk_state_change = pvcalls_sk_state_change; + write_unlock_bh(&map->sock->sk->sk_callback_lock); + + return map; +out: + down(&fedata->socket_lock); + list_del(&map->list); + pvcalls_back_release_active(fedata->dev, fedata, map); + up(&fedata->socket_lock); + return NULL; +} + static int pvcalls_back_connect(struct xenbus_device *dev, struct xen_pvcalls_request *req) +{ + struct pvcalls_fedata *fedata; + int ret = -EINVAL; + struct socket *sock; + struct sock_mapping *map; + struct xen_pvcalls_response *rsp; + struct sockaddr *sa = (struct sockaddr *)&req->u.connect.addr; + + fedata = dev_get_drvdata(&dev->dev); + + if (req->u.connect.len < sizeof(sa->sa_family) || + req->u.connect.len > sizeof(req->u.connect.addr) || + sa->sa_family != AF_INET) + goto out; + + ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock); + if (ret < 0) + goto out; + ret = inet_stream_connect(sock, sa, req->u.connect.len, 0); + if (ret < 0) { + sock_release(sock); + goto out; + } + + map = pvcalls_new_active_socket(fedata, + req->u.connect.id, + req->u.connect.ref, + req->u.connect.evtchn, + sock); + if (!map) { + ret = -EFAULT; + sock_release(map->sock); + } + +out: + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.connect.id = req->u.connect.id; + rsp->ret = ret; + + return 0; +} + +static int pvcalls_back_release_active(struct xenbus_device *dev, + struct pvcalls_fedata *fedata, + struct sock_mapping *map) { return 0; } @@ -206,6 +380,11 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) +{ + return IRQ_HANDLED; +} + static int backend_connect(struct xenbus_device *dev) { int err, evtchn; -- cgit v1.2.3 From 331a63e6f8e42bf20dfa5b94d717a5ef130fcbee Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:06 -0700 Subject: xen/pvcalls: implement bind command Allocate a socket. Track the allocated passive sockets with a new data structure named sockpass_mapping. It contains an unbound workqueue to schedule delayed work for the accept and poll commands. It also has a reqcopy field to be used to store a copy of a request for delayed work. Reads/writes to it are protected by a lock (the "copy_lock" spinlock). Initialize the workqueue in pvcalls_back_bind. Implement the bind command with inet_bind. The pass_sk_data_ready event handler will be added later. Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index c0fc774221ca..a6b8468c1341 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -78,6 +78,18 @@ struct sock_mapping { struct pvcalls_ioworker ioworker; }; +struct sockpass_mapping { + struct list_head list; + struct pvcalls_fedata *fedata; + struct socket *sock; + uint64_t id; + struct xen_pvcalls_request reqcopy; + spinlock_t copy_lock; + struct workqueue_struct *wq; + struct work_struct register_work; + void (*saved_data_ready)(struct sock *sk); +}; + static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map); static int pvcalls_back_release_active(struct xenbus_device *dev, struct pvcalls_fedata *fedata, @@ -268,9 +280,76 @@ static int pvcalls_back_release(struct xenbus_device *dev, return 0; } +static void __pvcalls_back_accept(struct work_struct *work) +{ +} + +static void pvcalls_pass_sk_data_ready(struct sock *sock) +{ +} + static int pvcalls_back_bind(struct xenbus_device *dev, struct xen_pvcalls_request *req) { + struct pvcalls_fedata *fedata; + int ret; + struct sockpass_mapping *map; + struct xen_pvcalls_response *rsp; + + fedata = dev_get_drvdata(&dev->dev); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) { + ret = -ENOMEM; + goto out; + } + + INIT_WORK(&map->register_work, __pvcalls_back_accept); + spin_lock_init(&map->copy_lock); + map->wq = alloc_workqueue("pvcalls_wq", WQ_UNBOUND, 1); + if (!map->wq) { + ret = -ENOMEM; + goto out; + } + + ret = sock_create(AF_INET, SOCK_STREAM, 0, &map->sock); + if (ret < 0) + goto out; + + ret = inet_bind(map->sock, (struct sockaddr *)&req->u.bind.addr, + req->u.bind.len); + if (ret < 0) + goto out; + + map->fedata = fedata; + map->id = req->u.bind.id; + + down(&fedata->socket_lock); + ret = radix_tree_insert(&fedata->socketpass_mappings, map->id, + map); + up(&fedata->socket_lock); + if (ret) + goto out; + + write_lock_bh(&map->sock->sk->sk_callback_lock); + map->saved_data_ready = map->sock->sk->sk_data_ready; + map->sock->sk->sk_user_data = map; + map->sock->sk->sk_data_ready = pvcalls_pass_sk_data_ready; + write_unlock_bh(&map->sock->sk->sk_callback_lock); + +out: + if (ret) { + if (map && map->sock) + sock_release(map->sock); + if (map && map->wq) + destroy_workqueue(map->wq); + kfree(map); + } + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.bind.id = req->u.bind.id; + rsp->ret = ret; return 0; } -- cgit v1.2.3 From 8ce3f7626f96b985142c5e93f9f2dc338b0c21bf Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:06 -0700 Subject: xen/pvcalls: implement listen command Call inet_listen to implement the listen command. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index a6b8468c1341..648fa75f3cc2 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -356,6 +356,27 @@ out: static int pvcalls_back_listen(struct xenbus_device *dev, struct xen_pvcalls_request *req) { + struct pvcalls_fedata *fedata; + int ret = -EINVAL; + struct sockpass_mapping *map; + struct xen_pvcalls_response *rsp; + + fedata = dev_get_drvdata(&dev->dev); + + down(&fedata->socket_lock); + map = radix_tree_lookup(&fedata->socketpass_mappings, req->u.listen.id); + up(&fedata->socket_lock); + if (map == NULL) + goto out; + + ret = inet_listen(map->sock, req->u.listen.backlog); + +out: + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.listen.id = req->u.listen.id; + rsp->ret = ret; return 0; } -- cgit v1.2.3 From 6f474e711617d00ef3be31f454301da00d0eb5ac Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:07 -0700 Subject: xen/pvcalls: implement accept command Implement the accept command by calling inet_accept. To avoid blocking in the kernel, call inet_accept(O_NONBLOCK) from a workqueue, which get scheduled on sk_data_ready (for a passive socket, it means that there are connections to accept). Use the reqcopy field to store the request. Accept the new socket from the delayed work function, create a new sock_mapping for it, map the indexes page and data ring, and reply to the other end. Allocate an ioworker for the socket. Only support one outstanding blocking accept request for every socket at any time. Add a field to sock_mapping to remember the passive socket from which an active socket was created. [ boris: fixed whitespaces ] Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 648fa75f3cc2..9a4bdc5e5d34 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -62,6 +62,7 @@ struct pvcalls_ioworker { struct sock_mapping { struct list_head list; struct pvcalls_fedata *fedata; + struct sockpass_mapping *sockpass; struct socket *sock; uint64_t id; grant_ref_t ref; @@ -282,10 +283,83 @@ static int pvcalls_back_release(struct xenbus_device *dev, static void __pvcalls_back_accept(struct work_struct *work) { + struct sockpass_mapping *mappass = container_of( + work, struct sockpass_mapping, register_work); + struct sock_mapping *map; + struct pvcalls_ioworker *iow; + struct pvcalls_fedata *fedata; + struct socket *sock; + struct xen_pvcalls_response *rsp; + struct xen_pvcalls_request *req; + int notify; + int ret = -EINVAL; + unsigned long flags; + + fedata = mappass->fedata; + /* + * __pvcalls_back_accept can race against pvcalls_back_accept. + * We only need to check the value of "cmd" on read. It could be + * done atomically, but to simplify the code on the write side, we + * use a spinlock. + */ + spin_lock_irqsave(&mappass->copy_lock, flags); + req = &mappass->reqcopy; + if (req->cmd != PVCALLS_ACCEPT) { + spin_unlock_irqrestore(&mappass->copy_lock, flags); + return; + } + spin_unlock_irqrestore(&mappass->copy_lock, flags); + + sock = sock_alloc(); + if (sock == NULL) + goto out_error; + sock->type = mappass->sock->type; + sock->ops = mappass->sock->ops; + + ret = inet_accept(mappass->sock, sock, O_NONBLOCK, true); + if (ret == -EAGAIN) { + sock_release(sock); + goto out_error; + } + + map = pvcalls_new_active_socket(fedata, + req->u.accept.id_new, + req->u.accept.ref, + req->u.accept.evtchn, + sock); + if (!map) { + ret = -EFAULT; + sock_release(sock); + goto out_error; + } + + map->sockpass = mappass; + iow = &map->ioworker; + atomic_inc(&map->read); + atomic_inc(&map->io); + queue_work(iow->wq, &iow->register_work); + +out_error: + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.accept.id = req->u.accept.id; + rsp->ret = ret; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata->ring, notify); + if (notify) + notify_remote_via_irq(fedata->irq); + + mappass->reqcopy.cmd = 0; } static void pvcalls_pass_sk_data_ready(struct sock *sock) { + struct sockpass_mapping *mappass = sock->sk_user_data; + + if (mappass == NULL) + return; + + queue_work(mappass->wq, &mappass->register_work); } static int pvcalls_back_bind(struct xenbus_device *dev, @@ -383,6 +457,45 @@ out: static int pvcalls_back_accept(struct xenbus_device *dev, struct xen_pvcalls_request *req) { + struct pvcalls_fedata *fedata; + struct sockpass_mapping *mappass; + int ret = -EINVAL; + struct xen_pvcalls_response *rsp; + unsigned long flags; + + fedata = dev_get_drvdata(&dev->dev); + + down(&fedata->socket_lock); + mappass = radix_tree_lookup(&fedata->socketpass_mappings, + req->u.accept.id); + up(&fedata->socket_lock); + if (mappass == NULL) + goto out_error; + + /* + * Limitation of the current implementation: only support one + * concurrent accept or poll call on one socket. + */ + spin_lock_irqsave(&mappass->copy_lock, flags); + if (mappass->reqcopy.cmd != 0) { + spin_unlock_irqrestore(&mappass->copy_lock, flags); + ret = -EINTR; + goto out_error; + } + + mappass->reqcopy = *req; + spin_unlock_irqrestore(&mappass->copy_lock, flags); + queue_work(mappass->wq, &mappass->register_work); + + /* Tell the caller we don't need to send back a notification yet */ + return -1; + +out_error: + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.accept.id = req->u.accept.id; + rsp->ret = ret; return 0; } -- cgit v1.2.3 From 3cf33a587de48740c2da4d2ea5b414d5e493a2eb Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:07 -0700 Subject: xen/pvcalls: implement poll command Implement poll on passive sockets by requesting a delayed response with mappass->reqcopy, and reply back when there is data on the passive socket. Poll on active socket is unimplemented as by the spec, as the frontend should just wait for events and check the indexes on the indexes page. Only support one outstanding poll (or accept) request for every passive socket at any given time. [ boris: fixed long lines ] Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 75 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 9a4bdc5e5d34..06bd95d4d552 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -355,11 +355,34 @@ out_error: static void pvcalls_pass_sk_data_ready(struct sock *sock) { struct sockpass_mapping *mappass = sock->sk_user_data; + struct pvcalls_fedata *fedata; + struct xen_pvcalls_response *rsp; + unsigned long flags; + int notify; if (mappass == NULL) return; - queue_work(mappass->wq, &mappass->register_work); + fedata = mappass->fedata; + spin_lock_irqsave(&mappass->copy_lock, flags); + if (mappass->reqcopy.cmd == PVCALLS_POLL) { + rsp = RING_GET_RESPONSE(&fedata->ring, + fedata->ring.rsp_prod_pvt++); + rsp->req_id = mappass->reqcopy.req_id; + rsp->u.poll.id = mappass->reqcopy.u.poll.id; + rsp->cmd = mappass->reqcopy.cmd; + rsp->ret = 0; + + mappass->reqcopy.cmd = 0; + spin_unlock_irqrestore(&mappass->copy_lock, flags); + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata->ring, notify); + if (notify) + notify_remote_via_irq(mappass->fedata->irq); + } else { + spin_unlock_irqrestore(&mappass->copy_lock, flags); + queue_work(mappass->wq, &mappass->register_work); + } } static int pvcalls_back_bind(struct xenbus_device *dev, @@ -502,6 +525,56 @@ out_error: static int pvcalls_back_poll(struct xenbus_device *dev, struct xen_pvcalls_request *req) { + struct pvcalls_fedata *fedata; + struct sockpass_mapping *mappass; + struct xen_pvcalls_response *rsp; + struct inet_connection_sock *icsk; + struct request_sock_queue *queue; + unsigned long flags; + int ret; + bool data; + + fedata = dev_get_drvdata(&dev->dev); + + down(&fedata->socket_lock); + mappass = radix_tree_lookup(&fedata->socketpass_mappings, + req->u.poll.id); + up(&fedata->socket_lock); + if (mappass == NULL) + return -EINVAL; + + /* + * Limitation of the current implementation: only support one + * concurrent accept or poll call on one socket. + */ + spin_lock_irqsave(&mappass->copy_lock, flags); + if (mappass->reqcopy.cmd != 0) { + ret = -EINTR; + goto out; + } + + mappass->reqcopy = *req; + icsk = inet_csk(mappass->sock->sk); + queue = &icsk->icsk_accept_queue; + data = queue->rskq_accept_head != NULL; + if (data) { + mappass->reqcopy.cmd = 0; + ret = 0; + goto out; + } + spin_unlock_irqrestore(&mappass->copy_lock, flags); + + /* Tell the caller we don't need to send back a notification yet */ + return -1; + +out: + spin_unlock_irqrestore(&mappass->copy_lock, flags); + + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.poll.id = req->u.poll.id; + rsp->ret = ret; return 0; } -- cgit v1.2.3 From a51729cb9b8079dcb520cb78a761f14fbdd0a3b4 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:07 -0700 Subject: xen/pvcalls: implement release command Release both active and passive sockets. For active sockets, make sure to avoid possible conflicts with the ioworker reading/writing to those sockets concurrently. Set map->release to let the ioworker know atomically that the socket will be released soon, then wait until the ioworker finishes (flush_work). Unmap indexes pages and data rings. Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 06bd95d4d552..701e8293dd28 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -272,12 +272,80 @@ static int pvcalls_back_release_active(struct xenbus_device *dev, struct pvcalls_fedata *fedata, struct sock_mapping *map) { + disable_irq(map->irq); + if (map->sock->sk != NULL) { + write_lock_bh(&map->sock->sk->sk_callback_lock); + map->sock->sk->sk_user_data = NULL; + map->sock->sk->sk_data_ready = map->saved_data_ready; + write_unlock_bh(&map->sock->sk->sk_callback_lock); + } + + atomic_set(&map->release, 1); + flush_work(&map->ioworker.register_work); + + xenbus_unmap_ring_vfree(dev, map->bytes); + xenbus_unmap_ring_vfree(dev, (void *)map->ring); + unbind_from_irqhandler(map->irq, map); + + sock_release(map->sock); + kfree(map); + + return 0; +} + +static int pvcalls_back_release_passive(struct xenbus_device *dev, + struct pvcalls_fedata *fedata, + struct sockpass_mapping *mappass) +{ + if (mappass->sock->sk != NULL) { + write_lock_bh(&mappass->sock->sk->sk_callback_lock); + mappass->sock->sk->sk_user_data = NULL; + mappass->sock->sk->sk_data_ready = mappass->saved_data_ready; + write_unlock_bh(&mappass->sock->sk->sk_callback_lock); + } + sock_release(mappass->sock); + flush_workqueue(mappass->wq); + destroy_workqueue(mappass->wq); + kfree(mappass); + return 0; } static int pvcalls_back_release(struct xenbus_device *dev, struct xen_pvcalls_request *req) { + struct pvcalls_fedata *fedata; + struct sock_mapping *map, *n; + struct sockpass_mapping *mappass; + int ret = 0; + struct xen_pvcalls_response *rsp; + + fedata = dev_get_drvdata(&dev->dev); + + down(&fedata->socket_lock); + list_for_each_entry_safe(map, n, &fedata->socket_mappings, list) { + if (map->id == req->u.release.id) { + list_del(&map->list); + up(&fedata->socket_lock); + ret = pvcalls_back_release_active(dev, fedata, map); + goto out; + } + } + mappass = radix_tree_lookup(&fedata->socketpass_mappings, + req->u.release.id); + if (mappass != NULL) { + radix_tree_delete(&fedata->socketpass_mappings, mappass->id); + up(&fedata->socket_lock); + ret = pvcalls_back_release_passive(dev, fedata, mappass); + } else + up(&fedata->socket_lock); + +out: + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->u.release.id = req->u.release.id; + rsp->cmd = req->cmd; + rsp->ret = ret; return 0; } -- cgit v1.2.3 From 0a85d23b8164c951a50512b84601decb9a5e4dd6 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:07 -0700 Subject: xen/pvcalls: disconnect and module_exit Implement backend_disconnect. Call pvcalls_back_release_active on active sockets and pvcalls_back_release_passive on passive sockets. Implement module_exit by calling backend_disconnect on frontend connections. [ boris: fixed long lines ] Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 701e8293dd28..7bdf9245cfab 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -807,6 +807,43 @@ static int backend_connect(struct xenbus_device *dev) static int backend_disconnect(struct xenbus_device *dev) { + struct pvcalls_fedata *fedata; + struct sock_mapping *map, *n; + struct sockpass_mapping *mappass; + struct radix_tree_iter iter; + void **slot; + + + fedata = dev_get_drvdata(&dev->dev); + + down(&fedata->socket_lock); + list_for_each_entry_safe(map, n, &fedata->socket_mappings, list) { + list_del(&map->list); + pvcalls_back_release_active(dev, fedata, map); + } + + radix_tree_for_each_slot(slot, &fedata->socketpass_mappings, &iter, 0) { + mappass = radix_tree_deref_slot(slot); + if (!mappass) + continue; + if (radix_tree_exception(mappass)) { + if (radix_tree_deref_retry(mappass)) + slot = radix_tree_iter_retry(&iter); + } else { + radix_tree_delete(&fedata->socketpass_mappings, + mappass->id); + pvcalls_back_release_passive(dev, fedata, mappass); + } + } + up(&fedata->socket_lock); + + unbind_from_irqhandler(fedata->irq, dev); + xenbus_unmap_ring_vfree(dev, fedata->sring); + + list_del(&fedata->list); + kfree(fedata); + dev_set_drvdata(&dev->dev, NULL); + return 0; } @@ -1003,3 +1040,19 @@ static int __init pvcalls_back_init(void) return 0; } module_init(pvcalls_back_init); + +static void __exit pvcalls_back_fin(void) +{ + struct pvcalls_fedata *fedata, *nfedata; + + down(&pvcalls_back_global.frontends_lock); + list_for_each_entry_safe(fedata, nfedata, + &pvcalls_back_global.frontends, list) { + backend_disconnect(fedata->dev); + } + up(&pvcalls_back_global.frontends_lock); + + xenbus_unregister_driver(&pvcalls_back_driver); +} + +module_exit(pvcalls_back_fin); -- cgit v1.2.3 From 5d520d8580b31d75a115ac58ab0a804b92581fd5 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:07 -0700 Subject: xen/pvcalls: implement the ioworker functions We have one ioworker per socket. Each ioworker goes through the list of outstanding read/write requests. Once all requests have been dealt with, it returns. We use one atomic counter per socket for "read" operations and one for "write" operations to keep track of the reads/writes to do. We also use one atomic counter ("io") per ioworker to keep track of how many outstanding requests we have in total assigned to the ioworker. The ioworker finishes when there are none. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 7bdf9245cfab..97d6fb159e8a 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -96,8 +96,34 @@ static int pvcalls_back_release_active(struct xenbus_device *dev, struct pvcalls_fedata *fedata, struct sock_mapping *map); +static void pvcalls_conn_back_read(void *opaque) +{ +} + +static void pvcalls_conn_back_write(struct sock_mapping *map) +{ +} + static void pvcalls_back_ioworker(struct work_struct *work) { + struct pvcalls_ioworker *ioworker = container_of(work, + struct pvcalls_ioworker, register_work); + struct sock_mapping *map = container_of(ioworker, struct sock_mapping, + ioworker); + + while (atomic_read(&map->io) > 0) { + if (atomic_read(&map->release) > 0) { + atomic_set(&map->release, 0); + return; + } + + if (atomic_read(&map->read) > 0) + pvcalls_conn_back_read(map); + if (atomic_read(&map->write) > 0) + pvcalls_conn_back_write(map); + + atomic_dec(&map->io); + } } static int pvcalls_back_socket(struct xenbus_device *dev, -- cgit v1.2.3 From b3f9f773af1f925b35f73f5e946bd96728b96cca Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:08 -0700 Subject: xen/pvcalls: implement read When an active socket has data available, increment the io and read counters, and schedule the ioworker. Implement the read function by reading from the socket, writing the data to the data ring. Set in_error on error. Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 97d6fb159e8a..2519e46bd470 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -98,6 +98,81 @@ static int pvcalls_back_release_active(struct xenbus_device *dev, static void pvcalls_conn_back_read(void *opaque) { + struct sock_mapping *map = (struct sock_mapping *)opaque; + struct msghdr msg; + struct kvec vec[2]; + RING_IDX cons, prod, size, wanted, array_size, masked_prod, masked_cons; + int32_t error; + struct pvcalls_data_intf *intf = map->ring; + struct pvcalls_data *data = &map->data; + unsigned long flags; + int ret; + + array_size = XEN_FLEX_RING_SIZE(map->ring_order); + cons = intf->in_cons; + prod = intf->in_prod; + error = intf->in_error; + /* read the indexes first, then deal with the data */ + virt_mb(); + + if (error) + return; + + size = pvcalls_queued(prod, cons, array_size); + if (size >= array_size) + return; + spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); + if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) { + atomic_set(&map->read, 0); + spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, + flags); + return; + } + spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); + wanted = array_size - size; + masked_prod = pvcalls_mask(prod, array_size); + masked_cons = pvcalls_mask(cons, array_size); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iter.type = ITER_KVEC|WRITE; + msg.msg_iter.count = wanted; + if (masked_prod < masked_cons) { + vec[0].iov_base = data->in + masked_prod; + vec[0].iov_len = wanted; + msg.msg_iter.kvec = vec; + msg.msg_iter.nr_segs = 1; + } else { + vec[0].iov_base = data->in + masked_prod; + vec[0].iov_len = array_size - masked_prod; + vec[1].iov_base = data->in; + vec[1].iov_len = wanted - vec[0].iov_len; + msg.msg_iter.kvec = vec; + msg.msg_iter.nr_segs = 2; + } + + atomic_set(&map->read, 0); + ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT); + WARN_ON(ret > wanted); + if (ret == -EAGAIN) /* shouldn't happen */ + return; + if (!ret) + ret = -ENOTCONN; + spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); + if (ret > 0 && !skb_queue_empty(&map->sock->sk->sk_receive_queue)) + atomic_inc(&map->read); + spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); + + /* write the data, then modify the indexes */ + virt_wmb(); + if (ret < 0) + intf->in_error = ret; + else + intf->in_prod = prod + ret; + /* update the indexes, then notify the other end */ + virt_wmb(); + notify_remote_via_irq(map->irq); + + return; } static void pvcalls_conn_back_write(struct sock_mapping *map) @@ -169,6 +244,16 @@ static void pvcalls_sk_state_change(struct sock *sock) static void pvcalls_sk_data_ready(struct sock *sock) { + struct sock_mapping *map = sock->sk_user_data; + struct pvcalls_ioworker *iow; + + if (map == NULL) + return; + + iow = &map->ioworker; + atomic_inc(&map->read); + atomic_inc(&map->io); + queue_work(iow->wq, &iow->register_work); } static struct sock_mapping *pvcalls_new_active_socket( -- cgit v1.2.3 From 5ad9918ffc4157f239f2e7849b4987d3b20f917e Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:08 -0700 Subject: xen/pvcalls: implement write When the other end notifies us that there is data to be written (pvcalls_back_conn_event), increment the io and write counters, and schedule the ioworker. Implement the write function called by ioworker by reading the data from the data ring, writing it to the socket by calling inet_sendmsg. Set out_error on error. Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 2519e46bd470..3201a0c5dbcf 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -177,6 +177,64 @@ static void pvcalls_conn_back_read(void *opaque) static void pvcalls_conn_back_write(struct sock_mapping *map) { + struct pvcalls_data_intf *intf = map->ring; + struct pvcalls_data *data = &map->data; + struct msghdr msg; + struct kvec vec[2]; + RING_IDX cons, prod, size, array_size; + int ret; + + cons = intf->out_cons; + prod = intf->out_prod; + /* read the indexes before dealing with the data */ + virt_mb(); + + array_size = XEN_FLEX_RING_SIZE(map->ring_order); + size = pvcalls_queued(prod, cons, array_size); + if (size == 0) + return; + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags |= MSG_DONTWAIT; + msg.msg_iter.type = ITER_KVEC|READ; + msg.msg_iter.count = size; + if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) { + vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); + vec[0].iov_len = size; + msg.msg_iter.kvec = vec; + msg.msg_iter.nr_segs = 1; + } else { + vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); + vec[0].iov_len = array_size - pvcalls_mask(cons, array_size); + vec[1].iov_base = data->out; + vec[1].iov_len = size - vec[0].iov_len; + msg.msg_iter.kvec = vec; + msg.msg_iter.nr_segs = 2; + } + + atomic_set(&map->write, 0); + ret = inet_sendmsg(map->sock, &msg, size); + if (ret == -EAGAIN || (ret >= 0 && ret < size)) { + atomic_inc(&map->write); + atomic_inc(&map->io); + } + if (ret == -EAGAIN) + return; + + /* write the data, then update the indexes */ + virt_wmb(); + if (ret < 0) { + intf->out_error = ret; + } else { + intf->out_error = 0; + intf->out_cons = cons + ret; + prod = intf->out_prod; + } + /* update the indexes, then notify the other end */ + virt_wmb(); + if (prod != cons + ret) + atomic_inc(&map->write); + notify_remote_via_irq(map->irq); } static void pvcalls_back_ioworker(struct work_struct *work) @@ -847,6 +905,19 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id) static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) { + struct sock_mapping *map = sock_map; + struct pvcalls_ioworker *iow; + + if (map == NULL || map->sock == NULL || map->sock->sk == NULL || + map->sock->sk->sk_user_data != map) + return IRQ_HANDLED; + + iow = &map->ioworker; + + atomic_inc(&map->write); + atomic_inc(&map->io); + queue_work(iow->wq, &iow->register_work); + return IRQ_HANDLED; } -- cgit v1.2.3 From 42d3078a8ad7542eee980da08a781a769bb21fe4 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 6 Jul 2017 11:01:08 -0700 Subject: xen: introduce a Kconfig option to enable the pvcalls backend Also add pvcalls-back to the Makefile. Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- drivers/xen/Kconfig | 12 ++++++++++++ drivers/xen/Makefile | 1 + 2 files changed, 13 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index f15bb3b789d5..4545561954ee 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -196,6 +196,18 @@ config XEN_PCIDEV_BACKEND If in doubt, say m. +config XEN_PVCALLS_BACKEND + bool "XEN PV Calls backend driver" + depends on INET && XEN && XEN_BACKEND + default n + help + Experimental backend for the Xen PV Calls protocol + (https://xenbits.xen.org/docs/unstable/misc/pvcalls.html). It + allows PV Calls frontends to send POSIX calls to the backend, + which implements them. + + If in doubt, say n. + config XEN_SCSI_BACKEND tristate "XEN SCSI backend driver" depends on XEN && XEN_BACKEND && TARGET_CORE diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 8feab810aed9..480b92887e57 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o obj-$(CONFIG_XEN_EFI) += efi.o obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o obj-$(CONFIG_XEN_AUTO_XLATE) += xlate_mmu.o +obj-$(CONFIG_XEN_PVCALLS_BACKEND) += pvcalls-back.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o -- cgit v1.2.3 From fff219d90c8220f9e8a254d792537503cfb93017 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 2 Aug 2017 23:16:57 +0530 Subject: xen-platform: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/platform-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 1275df83070f..5d7dcad0b0a0 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -175,7 +175,7 @@ pci_out: return ret; } -static struct pci_device_id platform_pci_tbl[] = { +static const struct pci_device_id platform_pci_tbl[] = { {PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {0,} -- cgit v1.2.3 From fefcfb9935be0208ff522563239f11a04f98c27d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Jul 2017 18:17:45 +0200 Subject: xen/pvcalls: use WARN_ON(1) instead of __WARN() __WARN() is an internal helper that is only available on some architectures, but causes a build error e.g. on ARM64 in some configurations: drivers/xen/pvcalls-back.c: In function 'set_backend_state': drivers/xen/pvcalls-back.c:1097:5: error: implicit declaration of function '__WARN' [-Werror=implicit-function-declaration] Unfortunately, there is no equivalent of BUG() that takes no arguments, but WARN_ON(1) is commonly used in other drivers and works on all configurations. Fixes: 7160378206b2 ("xen/pvcalls: xenbus state handling") Signed-off-by: Arnd Bergmann Reviewed-by: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-back.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 3201a0c5dbcf..b209cd44bb8d 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -1098,7 +1098,7 @@ static void set_backend_state(struct xenbus_device *dev, xenbus_switch_state(dev, XenbusStateClosing); break; default: - __WARN(); + WARN_ON(1); } break; case XenbusStateInitWait: @@ -1113,7 +1113,7 @@ static void set_backend_state(struct xenbus_device *dev, xenbus_switch_state(dev, XenbusStateClosing); break; default: - __WARN(); + WARN_ON(1); } break; case XenbusStateConnected: @@ -1127,7 +1127,7 @@ static void set_backend_state(struct xenbus_device *dev, xenbus_switch_state(dev, XenbusStateClosing); break; default: - __WARN(); + WARN_ON(1); } break; case XenbusStateClosing: @@ -1138,11 +1138,11 @@ static void set_backend_state(struct xenbus_device *dev, xenbus_switch_state(dev, XenbusStateClosed); break; default: - __WARN(); + WARN_ON(1); } break; default: - __WARN(); + WARN_ON(1); } } } -- cgit v1.2.3 From 22f12f0df802cea865672d8f39fbebdc03981050 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 17 Aug 2017 17:14:52 +0100 Subject: xen/events: events_fifo: Don't use {get,put}_cpu() in xen_evtchn_fifo_init() When booting Linux as Xen guest with CONFIG_DEBUG_ATOMIC, the following splat appears: [ 0.002323] Mountpoint-cache hash table entries: 1024 (order: 1, 8192 bytes) [ 0.019717] ASID allocator initialised with 65536 entries [ 0.020019] xen:grant_table: Grant tables using version 1 layout [ 0.020051] Grant table initialized [ 0.020069] BUG: sleeping function called from invalid context at /data/src/linux/mm/page_alloc.c:4046 [ 0.020100] in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0 [ 0.020123] no locks held by swapper/0/1. [ 0.020143] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.13.0-rc5 #598 [ 0.020166] Hardware name: FVP Base (DT) [ 0.020182] Call trace: [ 0.020199] [] dump_backtrace+0x0/0x270 [ 0.020222] [] show_stack+0x24/0x30 [ 0.020244] [] dump_stack+0xb8/0xf0 [ 0.020267] [] ___might_sleep+0x1c8/0x1f8 [ 0.020291] [] __might_sleep+0x58/0x90 [ 0.020313] [] __alloc_pages_nodemask+0x1c0/0x12e8 [ 0.020338] [] alloc_page_interleave+0x38/0x88 [ 0.020363] [] alloc_pages_current+0xdc/0xf0 [ 0.020387] [] __get_free_pages+0x28/0x50 [ 0.020411] [] evtchn_fifo_alloc_control_block+0x2c/0xa0 [ 0.020437] [] xen_evtchn_fifo_init+0x38/0xb4 [ 0.020461] [] xen_init_IRQ+0x44/0xc8 [ 0.020484] [] xen_guest_init+0x250/0x300 [ 0.020507] [] do_one_initcall+0x44/0x130 [ 0.020531] [] kernel_init_freeable+0x120/0x288 [ 0.020556] [] kernel_init+0x18/0x110 [ 0.020578] [] ret_from_fork+0x10/0x40 [ 0.020606] xen:events: Using FIFO-based ABI [ 0.020658] Xen: initializing cpu0 [ 0.027727] Hierarchical SRCU implementation. [ 0.036235] EFI services will not be available. [ 0.043810] smp: Bringing up secondary CPUs ... This is because get_cpu() in xen_evtchn_fifo_init() will disable preemption, but __get_free_page() might sleep (GFP_ATOMIC is not set). xen_evtchn_fifo_init() will always be called before SMP is initialized, so {get,put}_cpu() could be replaced by a simple smp_processor_id(). This also avoid to modify evtchn_fifo_alloc_control_block that will be called in other context. Signed-off-by: Julien Grall Reported-by: Andre Przywara Reviewed-by: Boris Ostrovsky Fixes: 1fe565517b57 ("xen/events: use the FIFO-based ABI if available") Signed-off-by: Boris Ostrovsky --- drivers/xen/events/events_fifo.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 3c41470c7fc4..76b318e88382 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -432,12 +432,12 @@ static int xen_evtchn_cpu_dead(unsigned int cpu) int __init xen_evtchn_fifo_init(void) { - int cpu = get_cpu(); + int cpu = smp_processor_id(); int ret; ret = evtchn_fifo_alloc_control_block(cpu); if (ret < 0) - goto out; + return ret; pr_info("Using FIFO-based ABI\n"); @@ -446,7 +446,6 @@ int __init xen_evtchn_fifo_init(void) cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, "xen/evtchn:prepare", xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); -out: - put_cpu(); + return ret; } -- cgit v1.2.3 From b194da25ca4812f6307204f9166ec623fd08d5d0 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 23 Aug 2017 18:27:04 -0400 Subject: xen: Don't try to call xen_alloc_p2m_entry() on autotranslating guests Commit aba831a69632 ("xen: remove tests for pvh mode in pure pv paths") removed XENFEAT_auto_translated_physmap test in xen_alloc_p2m_entry() since it is assumed that the routine is never called by non-PV guests. However, alloc_xenballooned_pages() may make this call on a PVH guest. Prevent this from happening by adding XENFEAT_auto_translated_physmap check there. Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Fixes: aba831a69632 ("xen: remove tests for pvh mode in pure pv paths") --- drivers/xen/balloon.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ab609255a0f3..f77e499afddd 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -664,9 +664,11 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages) */ BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); - ret = xen_alloc_p2m_entry(page_to_pfn(page)); - if (ret < 0) - goto out_undo; + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + ret = xen_alloc_p2m_entry(page_to_pfn(page)); + if (ret < 0) + goto out_undo; + } #endif } else { ret = add_ballooned_pages(nr_pages - pgno); -- cgit v1.2.3