diff options
Diffstat (limited to 'net')
57 files changed, 975 insertions, 434 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9ee5787634e5..953b6728bd00 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -626,11 +626,18 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; + netdev_features_t lower_features; - features = netdev_intersect_features(features, real_dev->vlan_features); - features |= NETIF_F_RXCSUM; - features = netdev_intersect_features(features, real_dev->features); + lower_features = netdev_intersect_features((real_dev->vlan_features | + NETIF_F_RXCSUM), + real_dev->features); + /* Add HW_CSUM setting to preserve user ability to control + * checksum offload on the vlan device. + */ + if (lower_features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + lower_features |= NETIF_F_HW_CSUM; + features = netdev_intersect_features(features, lower_features); features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE); features |= NETIF_F_LLTX; diff --git a/net/9p/Kconfig b/net/9p/Kconfig index a75174a33723..e6014e0e51f7 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -22,6 +22,15 @@ config NET_9P_VIRTIO This builds support for a transports between guest partitions and a host partition. +config NET_9P_XEN + depends on XEN + select XEN_XENBUS_FRONTEND + tristate "9P Xen Transport" + help + This builds support for a transport for 9pfs between + two Xen domains. + + config NET_9P_RDMA depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS tristate "9P RDMA Transport (Experimental)" diff --git a/net/9p/Makefile b/net/9p/Makefile index a0874cc1f718..697ea7caf466 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_NET_9P) := 9pnet.o +obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o @@ -14,5 +15,8 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o 9pnet_virtio-objs := \ trans_virtio.o \ +9pnet_xen-objs := \ + trans_xen.o \ + 9pnet_rdma-objs := \ trans_rdma.o \ diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c new file mode 100644 index 000000000000..71e85643b3f9 --- /dev/null +++ b/net/9p/trans_xen.c @@ -0,0 +1,545 @@ +/* + * linux/fs/9p/trans_xen + * + * Xen transport layer. + * + * Copyright (C) 2017 by Stefano Stabellini <stefano@aporeto.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xen/events.h> +#include <xen/grant_table.h> +#include <xen/xen.h> +#include <xen/xenbus.h> +#include <xen/interface/io/9pfs.h> + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/rwlock.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> + +#define XEN_9PFS_NUM_RINGS 2 +#define XEN_9PFS_RING_ORDER 6 +#define XEN_9PFS_RING_SIZE XEN_FLEX_RING_SIZE(XEN_9PFS_RING_ORDER) + +struct xen_9pfs_header { + uint32_t size; + uint8_t id; + uint16_t tag; + + /* uint8_t sdata[]; */ +} __attribute__((packed)); + +/* One per ring, more than one per 9pfs share */ +struct xen_9pfs_dataring { + struct xen_9pfs_front_priv *priv; + + struct xen_9pfs_data_intf *intf; + grant_ref_t ref; + int evtchn; + int irq; + /* protect a ring from concurrent accesses */ + spinlock_t lock; + + struct xen_9pfs_data data; + wait_queue_head_t wq; + struct work_struct work; +}; + +/* One per 9pfs share */ +struct xen_9pfs_front_priv { + struct list_head list; + struct xenbus_device *dev; + char *tag; + struct p9_client *client; + + int num_rings; + struct xen_9pfs_dataring *rings; +}; + +static LIST_HEAD(xen_9pfs_devs); +static DEFINE_RWLOCK(xen_9pfs_lock); + +/* We don't currently allow canceling of requests */ +static int p9_xen_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +static int p9_xen_create(struct p9_client *client, const char *addr, char *args) +{ + struct xen_9pfs_front_priv *priv; + + read_lock(&xen_9pfs_lock); + list_for_each_entry(priv, &xen_9pfs_devs, list) { + if (!strcmp(priv->tag, addr)) { + priv->client = client; + read_unlock(&xen_9pfs_lock); + return 0; + } + } + read_unlock(&xen_9pfs_lock); + return -EINVAL; +} + +static void p9_xen_close(struct p9_client *client) +{ + struct xen_9pfs_front_priv *priv; + + read_lock(&xen_9pfs_lock); + list_for_each_entry(priv, &xen_9pfs_devs, list) { + if (priv->client == client) { + priv->client = NULL; + read_unlock(&xen_9pfs_lock); + return; + } + } + read_unlock(&xen_9pfs_lock); +} + +static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size) +{ + RING_IDX cons, prod; + + cons = ring->intf->out_cons; + prod = ring->intf->out_prod; + virt_mb(); + + return XEN_9PFS_RING_SIZE - + xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) >= size; +} + +static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) +{ + struct xen_9pfs_front_priv *priv = NULL; + RING_IDX cons, prod, masked_cons, masked_prod; + unsigned long flags; + u32 size = p9_req->tc->size; + struct xen_9pfs_dataring *ring; + int num; + + read_lock(&xen_9pfs_lock); + list_for_each_entry(priv, &xen_9pfs_devs, list) { + if (priv->client == client) + break; + } + read_unlock(&xen_9pfs_lock); + if (!priv || priv->client != client) + return -EINVAL; + + num = p9_req->tc->tag % priv->num_rings; + ring = &priv->rings[num]; + +again: + while (wait_event_interruptible(ring->wq, + p9_xen_write_todo(ring, size)) != 0) + ; + + spin_lock_irqsave(&ring->lock, flags); + cons = ring->intf->out_cons; + prod = ring->intf->out_prod; + virt_mb(); + + if (XEN_9PFS_RING_SIZE - xen_9pfs_queued(prod, cons, + XEN_9PFS_RING_SIZE) < size) { + spin_unlock_irqrestore(&ring->lock, flags); + goto again; + } + + masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE); + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + + xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size, + &masked_prod, masked_cons, XEN_9PFS_RING_SIZE); + + p9_req->status = REQ_STATUS_SENT; + virt_wmb(); /* write ring before updating pointer */ + prod += size; + ring->intf->out_prod = prod; + spin_unlock_irqrestore(&ring->lock, flags); + notify_remote_via_irq(ring->irq); + + return 0; +} + +static void p9_xen_response(struct work_struct *work) +{ + struct xen_9pfs_front_priv *priv; + struct xen_9pfs_dataring *ring; + RING_IDX cons, prod, masked_cons, masked_prod; + struct xen_9pfs_header h; + struct p9_req_t *req; + int status; + + ring = container_of(work, struct xen_9pfs_dataring, work); + priv = ring->priv; + + while (1) { + cons = ring->intf->in_cons; + prod = ring->intf->in_prod; + virt_rmb(); + + if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) < + sizeof(h)) { + notify_remote_via_irq(ring->irq); + return; + } + + masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE); + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + + /* First, read just the header */ + xen_9pfs_read_packet(&h, ring->data.in, sizeof(h), + masked_prod, &masked_cons, + XEN_9PFS_RING_SIZE); + + req = p9_tag_lookup(priv->client, h.tag); + if (!req || req->status != REQ_STATUS_SENT) { + dev_warn(&priv->dev->dev, "Wrong req tag=%x\n", h.tag); + cons += h.size; + virt_mb(); + ring->intf->in_cons = cons; + continue; + } + + memcpy(req->rc, &h, sizeof(h)); + req->rc->offset = 0; + + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + /* Then, read the whole packet (including the header) */ + xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size, + masked_prod, &masked_cons, + XEN_9PFS_RING_SIZE); + + virt_mb(); + cons += h.size; + ring->intf->in_cons = cons; + + status = (req->status != REQ_STATUS_ERROR) ? + REQ_STATUS_RCVD : REQ_STATUS_ERROR; + + p9_client_cb(priv->client, req, status); + } +} + +static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r) +{ + struct xen_9pfs_dataring *ring = r; + + if (!ring || !ring->priv->client) { + /* ignore spurious interrupt */ + return IRQ_HANDLED; + } + + wake_up_interruptible(&ring->wq); + schedule_work(&ring->work); + + return IRQ_HANDLED; +} + +static struct p9_trans_module p9_xen_trans = { + .name = "xen", + .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT), + .def = 1, + .create = p9_xen_create, + .close = p9_xen_close, + .request = p9_xen_request, + .cancel = p9_xen_cancel, + .owner = THIS_MODULE, +}; + +static const struct xenbus_device_id xen_9pfs_front_ids[] = { + { "9pfs" }, + { "" } +}; + +static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) +{ + int i, j; + + write_lock(&xen_9pfs_lock); + list_del(&priv->list); + write_unlock(&xen_9pfs_lock); + + for (i = 0; i < priv->num_rings; i++) { + if (!priv->rings[i].intf) + break; + if (priv->rings[i].irq > 0) + unbind_from_irqhandler(priv->rings[i].irq, priv->dev); + if (priv->rings[i].data.in) { + for (j = 0; j < (1 << XEN_9PFS_RING_ORDER); j++) { + grant_ref_t ref; + + ref = priv->rings[i].intf->ref[j]; + gnttab_end_foreign_access(ref, 0, 0); + } + free_pages((unsigned long)priv->rings[i].data.in, + XEN_9PFS_RING_ORDER - + (PAGE_SHIFT - XEN_PAGE_SHIFT)); + } + gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); + free_page((unsigned long)priv->rings[i].intf); + } + kfree(priv->rings); + kfree(priv->tag); + kfree(priv); +} + +static int xen_9pfs_front_remove(struct xenbus_device *dev) +{ + struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); + + dev_set_drvdata(&dev->dev, NULL); + xen_9pfs_front_free(priv); + return 0; +} + +static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, + struct xen_9pfs_dataring *ring) +{ + int i = 0; + int ret = -ENOMEM; + void *bytes = NULL; + + init_waitqueue_head(&ring->wq); + spin_lock_init(&ring->lock); + INIT_WORK(&ring->work, p9_xen_response); + + ring->intf = (struct xen_9pfs_data_intf *)get_zeroed_page(GFP_KERNEL); + if (!ring->intf) + return ret; + ret = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_gfn(ring->intf), 0); + if (ret < 0) + goto out; + ring->ref = ret; + bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + if (!bytes) { + ret = -ENOMEM; + goto out; + } + for (; i < (1 << XEN_9PFS_RING_ORDER); i++) { + ret = gnttab_grant_foreign_access( + dev->otherend_id, virt_to_gfn(bytes) + i, 0); + if (ret < 0) + goto out; + ring->intf->ref[i] = ret; + } + ring->intf->ring_order = XEN_9PFS_RING_ORDER; + ring->data.in = bytes; + ring->data.out = bytes + XEN_9PFS_RING_SIZE; + + ret = xenbus_alloc_evtchn(dev, &ring->evtchn); + if (ret) + goto out; + ring->irq = bind_evtchn_to_irqhandler(ring->evtchn, + xen_9pfs_front_event_handler, + 0, "xen_9pfs-frontend", ring); + if (ring->irq >= 0) + return 0; + + xenbus_free_evtchn(dev, ring->evtchn); + ret = ring->irq; +out: + if (bytes) { + for (i--; i >= 0; i--) + gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); + free_pages((unsigned long)bytes, + XEN_9PFS_RING_ORDER - + (PAGE_SHIFT - XEN_PAGE_SHIFT)); + } + gnttab_end_foreign_access(ring->ref, 0, 0); + free_page((unsigned long)ring->intf); + return ret; +} + +static int xen_9pfs_front_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int ret, i; + struct xenbus_transaction xbt; + struct xen_9pfs_front_priv *priv = NULL; + char *versions; + unsigned int max_rings, max_ring_order, len = 0; + + versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len); + if (!len) + return -EINVAL; + if (strcmp(versions, "1")) { + kfree(versions); + return -EINVAL; + } + kfree(versions); + max_rings = xenbus_read_unsigned(dev->otherend, "max-rings", 0); + if (max_rings < XEN_9PFS_NUM_RINGS) + return -EINVAL; + max_ring_order = xenbus_read_unsigned(dev->otherend, + "max-ring-page-order", 0); + if (max_ring_order < XEN_9PFS_RING_ORDER) + return -EINVAL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = dev; + priv->num_rings = XEN_9PFS_NUM_RINGS; + priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings), + GFP_KERNEL); + if (!priv->rings) { + kfree(priv); + return -ENOMEM; + } + + for (i = 0; i < priv->num_rings; i++) { + priv->rings[i].priv = priv; + ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i]); + if (ret < 0) + goto error; + } + + again: + ret = xenbus_transaction_start(&xbt); + if (ret) { + xenbus_dev_fatal(dev, ret, "starting transaction"); + goto error; + } + ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "num-rings", "%u", + priv->num_rings); + if (ret) + goto error_xenbus; + for (i = 0; i < priv->num_rings; i++) { + char str[16]; + + BUILD_BUG_ON(XEN_9PFS_NUM_RINGS > 9); + sprintf(str, "ring-ref%u", i); + ret = xenbus_printf(xbt, dev->nodename, str, "%d", + priv->rings[i].ref); + if (ret) + goto error_xenbus; + + sprintf(str, "event-channel-%u", i); + ret = xenbus_printf(xbt, dev->nodename, str, "%u", + priv->rings[i].evtchn); + if (ret) + goto error_xenbus; + } + priv->tag = xenbus_read(xbt, dev->nodename, "tag", NULL); + if (!priv->tag) { + ret = -EINVAL; + goto error_xenbus; + } + ret = xenbus_transaction_end(xbt, 0); + if (ret) { + if (ret == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, ret, "completing transaction"); + goto error; + } + + write_lock(&xen_9pfs_lock); + list_add_tail(&priv->list, &xen_9pfs_devs); + write_unlock(&xen_9pfs_lock); + dev_set_drvdata(&dev->dev, priv); + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; + + error_xenbus: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, ret, "writing xenstore"); + error: + dev_set_drvdata(&dev->dev, NULL); + xen_9pfs_front_free(priv); + return ret; +} + +static int xen_9pfs_front_resume(struct xenbus_device *dev) +{ + dev_warn(&dev->dev, "suspsend/resume unsupported\n"); + return 0; +} + +static void xen_9pfs_front_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + switch (backend_state) { + case XenbusStateReconfiguring: + case XenbusStateReconfigured: + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateUnknown: + break; + + case XenbusStateInitWait: + break; + + case XenbusStateConnected: + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosed: + if (dev->state == XenbusStateClosed) + break; + /* Missed the backend's CLOSING state -- fallthrough */ + case XenbusStateClosing: + xenbus_frontend_closed(dev); + break; + } +} + +static struct xenbus_driver xen_9pfs_front_driver = { + .ids = xen_9pfs_front_ids, + .probe = xen_9pfs_front_probe, + .remove = xen_9pfs_front_remove, + .resume = xen_9pfs_front_resume, + .otherend_changed = xen_9pfs_front_changed, +}; + +int p9_trans_xen_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + pr_info("Initialising Xen transport for 9pfs\n"); + + v9fs_register_trans(&p9_xen_trans); + return xenbus_register_frontend(&xen_9pfs_front_driver); +} +module_init(p9_trans_xen_init); + +void p9_trans_xen_exit(void) +{ + v9fs_unregister_trans(&p9_xen_trans); + return xenbus_unregister_driver(&xen_9pfs_front_driver); +} +module_exit(p9_trans_xen_exit); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a572db710d4e..c5ce7745b230 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -133,6 +133,8 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_MCAST_TO_UCAST */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_MCAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_BCAST_FLOOD */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ @@ -633,6 +635,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 }, [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, + [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 108533859a53..4eb773ccce11 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -187,7 +187,7 @@ void *ceph_kvmalloc(size_t size, gfp_t flags) return ptr; } - return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, flags, PAGE_KERNEL); } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f76bb3332613..5766a6c896c4 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1386,8 +1386,9 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec now = CURRENT_TIME; + struct timespec now; + ktime_get_real_ts(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec(&con->out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), @@ -3176,8 +3177,9 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, { if (interval > 0 && (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { - struct timespec now = CURRENT_TIME; + struct timespec now; struct timespec ts; + ktime_get_real_ts(&now); jiffies_to_timespec(interval, &ts); ts = timespec_add(con->last_keepalive_ack, ts); return timespec_compare(&now, &ts) >= 0; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e15ea9e4c495..242d7c0d92f8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3574,7 +3574,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE; - lreq->mtime = CURRENT_TIME; + ktime_get_real_ts(&lreq->mtime); lreq->reg_req = alloc_linger_request(lreq); if (!lreq->reg_req) { @@ -3632,7 +3632,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; - req->r_mtime = CURRENT_TIME; + ktime_get_real_ts(&req->r_mtime); osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH); diff --git a/net/core/dev.c b/net/core/dev.c index d07aa5ffb511..fca407b4a6ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -81,6 +81,7 @@ #include <linux/hash.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/mutex.h> #include <linux/string.h> #include <linux/mm.h> @@ -4235,7 +4236,7 @@ static int __netif_receive_skb(struct sk_buff *skb) int ret; if (sk_memalloc_socks() && skb_pfmemalloc(skb)) { - unsigned long pflags = current->flags; + unsigned int noreclaim_flag; /* * PFMEMALLOC skbs are special, they should @@ -4246,9 +4247,9 @@ static int __netif_receive_skb(struct sk_buff *skb) * Use PF_MEMALLOC as this saves us from propagating the allocation * context down to all allocation sites. */ - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); ret = __netif_receive_skb_core(skb, true); - current_restore_flags(pflags, PF_MEMALLOC); + memalloc_noreclaim_restore(noreclaim_flag); } else ret = __netif_receive_skb_core(skb, false); @@ -6851,6 +6852,32 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_QUERY_PROG; + + /* Query must always succeed. */ + WARN_ON(xdp_op(dev, &xdp) < 0); + return xdp.prog_attached; +} + +static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, + struct netlink_ext_ack *extack, + struct bpf_prog *prog) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_SETUP_PROG; + xdp.extack = extack; + xdp.prog = prog; + + return xdp_op(dev, &xdp); +} + /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device @@ -6863,41 +6890,34 @@ EXPORT_SYMBOL(dev_change_proto_down); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags) { - int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp); const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; - struct netdev_xdp xdp; + xdp_op_t xdp_op, xdp_chk; int err; ASSERT_RTNL(); - xdp_op = ops->ndo_xdp; + xdp_op = xdp_chk = ops->ndo_xdp; + if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE)) + return -EOPNOTSUPP; if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) xdp_op = generic_xdp_install; + if (xdp_op == xdp_chk) + xdp_chk = generic_xdp_install; if (fd >= 0) { - if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - - err = xdp_op(dev, &xdp); - if (err < 0) - return err; - if (xdp.prog_attached) - return -EBUSY; - } + if (xdp_chk && __dev_xdp_attached(dev, xdp_chk)) + return -EEXIST; + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && + __dev_xdp_attached(dev, xdp_op)) + return -EBUSY; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); if (IS_ERR(prog)) return PTR_ERR(prog); } - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_SETUP_PROG; - xdp.extack = extack; - xdp.prog = prog; - - err = xdp_op(dev, &xdp); + err = dev_xdp_install(dev, xdp_op, extack, prog); if (err < 0 && prog) bpf_prog_put(prog); @@ -7264,12 +7284,10 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); - rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!rx) { - rx = vzalloc(sz); - if (!rx) - return -ENOMEM; - } + rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + if (!rx) + return -ENOMEM; + dev->_rx = rx; for (i = 0; i < count; i++) @@ -7306,12 +7324,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) if (count < 1 || count > 0xffff) return -EINVAL; - tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!tx) { - tx = vzalloc(sz); - if (!tx) - return -ENOMEM; - } + tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + if (!tx) + return -ENOMEM; + dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -7845,9 +7861,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!p) - p = vzalloc(alloc_size); + p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT); if (!p) return NULL; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6e67315ec368..d7f82c3450b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -899,8 +899,7 @@ static size_t rtnl_port_size(const struct net_device *dev, static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ - nla_total_size(1) + /* XDP_ATTACHED */ - nla_total_size(4); /* XDP_FLAGS */ + nla_total_size(1); /* XDP_ATTACHED */ return xdp_size; } @@ -1054,7 +1053,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) return err; } - if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name)) return -EMSGSIZE; return 0; @@ -1247,37 +1246,34 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } +static u8 rtnl_xdp_attached_mode(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + ASSERT_RTNL(); + + if (rcu_access_pointer(dev->xdp_prog)) + return XDP_ATTACHED_SKB; + if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp)) + return XDP_ATTACHED_DRV; + + return XDP_ATTACHED_NONE; +} + static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { struct nlattr *xdp; - u32 xdp_flags = 0; - u8 val = 0; int err; xdp = nla_nest_start(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; - if (rcu_access_pointer(dev->xdp_prog)) { - xdp_flags = XDP_FLAGS_SKB_MODE; - val = 1; - } else if (dev->netdev_ops->ndo_xdp) { - struct netdev_xdp xdp_op = {}; - - xdp_op.command = XDP_QUERY_PROG; - err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); - if (err) - goto err_cancel; - val = xdp_op.prog_attached; - } - err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val); + + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, + rtnl_xdp_attached_mode(dev)); if (err) goto err_cancel; - if (xdp_flags) { - err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags); - if (err) - goto err_cancel; - } nla_nest_end(skb, xdp); return 0; @@ -2199,6 +2195,11 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } + if ((xdp_flags & XDP_FLAGS_SKB_MODE) && + (xdp_flags & XDP_FLAGS_DRV_MODE)) { + err = -EINVAL; + goto errout; + } } if (xdp[IFLA_XDP_FD]) { diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6bd2f8fb0476..ae35cce3a40d 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -24,9 +24,13 @@ static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { - net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } + +static __always_inline void ts_secret_init(void) +{ + net_get_random_once(&ts_secret, sizeof(ts_secret)); +} #endif #ifdef CONFIG_INET @@ -47,7 +51,7 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; @@ -60,12 +64,14 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } +EXPORT_SYMBOL(secure_tcpv6_ts_off); -u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; @@ -78,14 +84,14 @@ u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, .sport = sport, .dport = dport }; - u64 hash; + u32 hash; + net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } -EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff); +EXPORT_SYMBOL(secure_tcpv6_seq); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) @@ -107,11 +113,12 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) { if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash_2u32((__force u32)saddr, (__force u32)daddr, &ts_secret); } @@ -121,15 +128,15 @@ static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ -u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) { - u64 hash; + u32 hash; + net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } diff --git a/net/core/sock.c b/net/core/sock.c index b5baeb9cb0fb..e43e71d7856b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -102,6 +102,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> @@ -372,14 +373,14 @@ EXPORT_SYMBOL_GPL(sk_clear_memalloc); int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int ret; - unsigned long pflags = current->flags; + unsigned int noreclaim_flag; /* these should have been dropped before queueing */ BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); ret = sk->sk_backlog_rcv(sk, skb); - current_restore_flags(pflags, PF_MEMALLOC); + memalloc_noreclaim_restore(noreclaim_flag); return ret; } @@ -1802,28 +1803,24 @@ EXPORT_SYMBOL(skb_set_owner_w); * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. * But we also want to keep skb->sk set because some packet schedulers - * rely on it (sch_fq for example). So we set skb->truesize to a small - * amount (1) and decrease sk_wmem_alloc accordingly. + * rely on it (sch_fq for example). */ void skb_orphan_partial(struct sk_buff *skb) { - /* If this skb is a TCP pure ACK or already went here, - * we have nothing to do. 2 is already a very small truesize. - */ - if (skb->truesize <= 2) + if (skb_is_tcp_pure_ack(skb)) return; - /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, - * so we do not completely orphan skb, but transfert all - * accounted bytes but one, to avoid unexpected reorders. - */ if (skb->destructor == sock_wfree #ifdef CONFIG_INET || skb->destructor == tcp_wfree #endif ) { - atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); - skb->truesize = 1; + struct sock *sk = skb->sk; + + if (atomic_inc_not_zero(&sk->sk_refcnt)) { + atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + skb->destructor = sock_efree; + } } else { skb_orphan(skb); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d9b6a4e403e7..b6bbb71e713e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; @@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 9afa2a5030b2..405483a07efc 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2361,7 +2361,8 @@ MODULE_AUTHOR("Linux DECnet Project Team"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_DECnet); -static char banner[] __initdata = KERN_INFO "NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n"; +static const char banner[] __initconst = KERN_INFO +"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n"; static int __init decnet_init(void) { diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 482730cd8a56..eeb5fc561f80 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -110,7 +110,7 @@ struct neigh_table dn_neigh_table = { static int dn_neigh_construct(struct neighbour *neigh) { struct net_device *dev = neigh->dev; - struct dn_neigh *dn = (struct dn_neigh *)neigh; + struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); struct dn_dev *dn_db; struct neigh_parms *parms; @@ -339,7 +339,7 @@ int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *) dst; struct neighbour *neigh = rt->n; - struct dn_neigh *dn = (struct dn_neigh *)neigh; + struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); struct dn_dev *dn_db; bool use_long; @@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - dn = (struct dn_neigh *)neigh; + dn = container_of(neigh, struct dn_neigh, n); if (neigh) { write_lock(&neigh->lock); @@ -451,7 +451,7 @@ int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - dn = (struct dn_neigh *)neigh; + dn = container_of(neigh, struct dn_neigh, n); if (neigh) { write_lock(&neigh->lock); @@ -510,7 +510,7 @@ static void neigh_elist_cb(struct neighbour *neigh, void *_info) if (neigh->dev != s->dev) return; - dn = (struct dn_neigh *) neigh; + dn = container_of(neigh, struct dn_neigh, n); if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2))) return; @@ -549,7 +549,7 @@ int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n) static inline void dn_neigh_format_entry(struct seq_file *seq, struct neighbour *n) { - struct dn_neigh *dn = (struct dn_neigh *) n; + struct dn_neigh *dn = container_of(n, struct dn_neigh, n); char buf[DN_ASCBUF_LEN]; read_lock(&n->lock); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5e313c1ac94f..1054d330bf9d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -794,6 +794,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, /* listeners have SOCK_RCU_FREE, not the children */ sock_reset_flag(newsk, SOCK_RCU_FREE); + inet_sk(newsk)->mc_list = NULL; + newsk->sk_mark = inet_rsk(req)->ir_mark; atomic64_set(&newsk->sk_cookie, atomic64_read(&inet_rsk(req)->ir_cookie)); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8bea74298173..e9a59d2d91d4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -678,11 +678,7 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) /* no more locks than number of hash buckets */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); - hashinfo->ehash_locks = kmalloc_array(nblocks, locksz, - GFP_KERNEL | __GFP_NOWARN); - if (!hashinfo->ehash_locks) - hashinfo->ehash_locks = vmalloc(nblocks * locksz); - + hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); if (!hashinfo->ehash_locks) return -ENOMEM; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 40977413fd48..4ec9affb2252 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -546,12 +546,13 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; - nla_put_u32(skb, IFLA_VTI_LINK, p->link); - nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key); - nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key); - nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr); - nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr); - nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark); + if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) || + nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key) || + nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr) || + nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr) || + nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark)) + return -EMSGSIZE; return 0; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9d943974de2b..bdffad875691 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -358,6 +358,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, rt->dst.dev->mtu); return -EMSGSIZE; } + if (length < sizeof(struct iphdr)) + return -EINVAL; + if (flags&MSG_PROBE) goto out; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 496b97e17aaf..0257d965f111 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include <linux/siphash.h> #include <linux/kernel.h> #include <linux/export.h> +#include <net/secure_seq.h> #include <net/tcp.h> #include <net/route.h> @@ -203,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, u32 tsoff) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -213,6 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, NULL, &own_req); if (child) { atomic_set(&req->rsk_refcnt, 1); + tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); inet_csk_reqsk_queue_add(sk, req, child); } else { @@ -292,6 +294,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -311,6 +314,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -381,7 +389,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); - ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9739962bfb3f..06e2dbc2b4a2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,6 @@ int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); -EXPORT_SYMBOL(sysctl_tcp_timestamps); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; @@ -1180,13 +1179,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, */ if (pkt_len > mss) { unsigned int new_len = (pkt_len / mss) * mss; - if (!in_sack && new_len < pkt_len) { + if (!in_sack && new_len < pkt_len) new_len += mss; - if (new_len >= skb->len) - return 0; - } pkt_len = new_len; } + + if (pkt_len >= skb->len && !in_sack) + return 0; + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; @@ -6347,8 +6347,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - if (isn && tmp_opt.tstamp_ok) - af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); + if (tmp_opt.tstamp_ok) + tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb); if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ @@ -6368,7 +6368,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); + isn = af_ops->init_seq(skb); } if (!dst) { dst = af_ops->route_req(sk, &fl, req); @@ -6380,7 +6380,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - tcp_rsk(req)->ts_off = 0; req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cbbafe546c0f..3a51582bef55 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,12 +94,18 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v4_init_seq(const struct sk_buff *skb) { - return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcp_seq(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v4_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcp_ts_off(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -145,7 +151,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct flowi4 *fl4; struct rtable *rt; int err; - u32 seq; struct ip_options_rcu *inet_opt; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -232,13 +237,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; if (likely(!tp->repair)) { - seq = secure_tcp_seq_and_tsoff(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcp_seq(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port); + tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr, + inet->inet_daddr); } inet->inet_id = tp->write_seq ^ jiffies; @@ -1239,7 +1244,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq_tsoff = tcp_v4_init_seq_and_tsoff, + .init_seq = tcp_v4_init_seq, + .init_ts_off = tcp_v4_init_ts_off, .send_synack = tcp_v4_send_synack, }; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9d0d4f39e42b..653bbd67e3a3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1011,10 +1011,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) tcp_metrics_hash_log = order_base_2(slots); size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log; - tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!tcp_metrics_hash) - tcp_metrics_hash = vzalloc(size); - + tcp_metrics_hash = kvzalloc(size, GFP_KERNEL); if (!tcp_metrics_hash) return -ENOMEM; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8f6373b0cd77..717be4de5324 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -523,6 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; tcp_ecn_openreq_child(newtp, req); + newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; newtp->rack.mstamp.v64 = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 60111a0fc201..4858e190f6ac 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1514,6 +1514,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) { + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; struct tcp_sock *tp = tcp_sk(sk); /* Track the maximum number of outstanding packets in each @@ -1536,7 +1537,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) tp->snd_cwnd_used = tp->packets_out; if (sysctl_tcp_slow_start_after_idle && - (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) + (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && + !ca_ops->cong_control) tcp_cwnd_application_limited(sk); /* The following conditions together indicate the starvation diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a2a370b71249..8d297a79b568 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3548,6 +3548,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, */ static struct notifier_block ipv6_dev_notf = { .notifier_call = addrconf_notify, + .priority = ADDRCONF_NOTIFY_PRIORITY, }; static void addrconf_type_change(struct net_device *dev, unsigned long event) @@ -6573,6 +6574,8 @@ int __init addrconf_init(void) goto errlo; } + ip6_route_init_special_entries(); + for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_addr_lst[i]); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index af8f52ee7180..2fd5ca151dcf 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -41,13 +41,7 @@ static int alloc_ila_locks(struct ila_net *ilan) size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); if (sizeof(spinlock_t) != 0) { -#ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) - ilan->locks = vmalloc(size * sizeof(spinlock_t)); - else -#endif - ilan->locks = kmalloc_array(size, sizeof(spinlock_t), - GFP_KERNEL); + ilan->locks = kvmalloc(size * sizeof(spinlock_t), GFP_KERNEL); if (!ilan->locks) return -ENOMEM; for (i = 0; i < size; i++) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0da6a12b5472..1f992d9e261d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -632,6 +632,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } + if (length < sizeof(struct ipv6hdr)) + return -EINVAL; if (flags&MSG_PROBE) goto out; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a1bf426c959b..dc61b0b5e64e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3709,7 +3709,10 @@ static int ip6_route_dev_notify(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) + return NOTIFY_OK; + + if (event == NETDEV_REGISTER) { net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -3718,6 +3721,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); #endif + } else if (event == NETDEV_UNREGISTER) { + in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); + in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev); +#endif } return NOTIFY_OK; @@ -4024,9 +4033,24 @@ static struct pernet_operations ip6_route_net_late_ops = { static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, - .priority = 0, + .priority = ADDRCONF_NOTIFY_PRIORITY - 10, }; +void __init ip6_route_init_special_entries(void) +{ + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif +} + int __init ip6_route_init(void) { int ret; @@ -4053,17 +4077,6 @@ int __init ip6_route_init(void) ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; - /* Registering of the loopback is done before this portion of code, - * the loopback reference in rt6_info will not be taken, do it - * manually for init_net */ - init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #ifdef CONFIG_IPV6_MULTIPLE_TABLES - init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #endif ret = fib6_init(); if (ret) goto out_register_subsys; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 895ff650db43..5abc3692b901 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -18,6 +18,7 @@ #include <linux/random.h> #include <linux/siphash.h> #include <linux/kernel.h> +#include <net/secure_seq.h> #include <net/ipv6.h> #include <net/tcp.h> @@ -143,6 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -162,6 +164,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -242,7 +250,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - ret = tcp_get_cookie_sock(sk, skb, req, dst); + ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); out: return ret; out_free: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8e42e8f54b70..df5a9ff71f3f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,12 +101,18 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v6_init_seq(const struct sk_buff *skb) { - return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v6_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -122,7 +128,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct flowi6 fl6; struct dst_entry *dst; int addr_type; - u32 seq; int err; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -282,13 +287,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk_set_txhash(sk); if (likely(!tp->repair)) { - seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport); + tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32); } if (tcp_fastopen_defer_connect(sk, &err)) @@ -749,7 +754,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, - .init_seq_tsoff = tcp_v6_init_seq_and_tsoff, + .init_seq = tcp_v6_init_seq, + .init_ts_off = tcp_v6_init_ts_off, .send_synack = tcp_v6_send_synack, }; @@ -1056,6 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; @@ -1125,6 +1132,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 6db09fa18269..364d4e137649 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -66,6 +66,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, 2 + (IEEE80211_MAX_SUPP_RATES - 8) + 2 + sizeof(struct ieee80211_ht_cap) + 2 + sizeof(struct ieee80211_ht_operation) + + 2 + sizeof(struct ieee80211_vht_cap) + + 2 + sizeof(struct ieee80211_vht_operation) + ifibss->ie_len; presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL); if (!presp) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 89dff563b1ec..0ea9712bd99e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4382,6 +4382,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) return -EINVAL; + /* If a reconfig is happening, bail out */ + if (local->in_reconfig) + return -EBUSY; + if (assoc) { rcu_read_lock(); have_sta = sta_info_get(sdata, cbss->bssid); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 088e2b459d0f..257ec66009da 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2005,10 +2005,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) unsigned index; if (size) { - labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (!labels) - labels = vzalloc(size); - + labels = kvzalloc(size, GFP_KERNEL); if (!labels) goto nolabels; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3a60efa7799b..7f6100ca63be 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) #endif if (h != NULL && !try_module_get(h->me)) h = NULL; + if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { + module_put(h->me); + h = NULL; + } rcu_read_unlock(); @@ -181,6 +185,13 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) +{ + refcount_dec(&helper->refcnt); + module_put(helper->me); +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); + struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp) @@ -417,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) } } } + refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index dcf561b5c97a..fa752626029e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1007,9 +1007,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { static int ctnetlink_parse_tuple(const struct nlattr * const cda[], - struct nf_conntrack_tuple *tuple, - enum ctattr_type type, u_int8_t l3num, - struct nf_conntrack_zone *zone) + struct nf_conntrack_tuple *tuple, u32 type, + u_int8_t l3num, struct nf_conntrack_zone *zone) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; @@ -2447,7 +2446,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = { static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - enum ctattr_expect type) + u32 type) { struct nlattr *nest_parms; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index b48d6b5aae8a..ef0be325a0c6 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct, { struct nf_conntrack_tuple curr_tuple, new_tuple; + /* Can't setup nat info for confirmed ct. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 950bf6eadc65..be678a323598 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } + ret = -ENOENT; list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; j++; @@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple.dst.protonum != cur->tuple.dst.protonum)) continue; - found = true; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); + if (refcount_dec_if_one(&cur->refcnt)) { + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); - list_del(&nlcth->list); - kfree(nlcth); + list_del(&nlcth->list); + kfree(nlcth); + } else { + ret = -EBUSY; + } } /* Make sure we return success if we flush and there is no helpers */ - return (found || j == 0) ? 0 : -ENOENT; + return (found || j == 0) ? 0 : ret; } static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a34ceb38fc55..1678e9e75e8e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj) struct nft_ct_helper_obj *priv = nft_obj_data(obj); if (priv->helper4) - module_put(priv->helper4->me); + nf_conntrack_helper_put(priv->helper4); if (priv->helper6) - module_put(priv->helper6->me); + nf_conntrack_helper_put(priv->helper6); } static void nft_ct_helper_obj_eval(struct nft_object *obj, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f134d384852f..8876b7da6884 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -763,17 +763,8 @@ EXPORT_SYMBOL(xt_check_entry_offsets); */ unsigned int *xt_alloc_entry_offsets(unsigned int size) { - unsigned int *off; + return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO); - off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN); - - if (off) - return off; - - if (size < (SIZE_MAX / sizeof(unsigned int))) - off = vmalloc(size * sizeof(unsigned int)); - - return off; } EXPORT_SYMBOL(xt_alloc_entry_offsets); @@ -1007,8 +998,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!info) { - info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | - __GFP_NORETRY | __GFP_HIGHMEM, + info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, PAGE_KERNEL); if (!info) return NULL; @@ -1116,7 +1106,7 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) - i->jumpstack = vzalloc(size); + i->jumpstack = kvzalloc(size, GFP_KERNEL); else i->jumpstack = kzalloc(size, GFP_KERNEL); if (i->jumpstack == NULL) @@ -1138,12 +1128,8 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) */ size = sizeof(void *) * i->stacksize * 2u; for_each_possible_cpu(cpu) { - if (size > PAGE_SIZE) - i->jumpstack[cpu] = vmalloc_node(size, - cpu_to_node(cpu)); - else - i->jumpstack[cpu] = kmalloc_node(size, - GFP_KERNEL, cpu_to_node(cpu)); + i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); if (i->jumpstack[cpu] == NULL) /* * Freeing will be done later on by the callers. The diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 19247a17e511..c502419d6306 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -31,146 +31,76 @@ MODULE_ALIAS("ip6t_AUDIT"); MODULE_ALIAS("ebt_AUDIT"); MODULE_ALIAS("arpt_AUDIT"); -static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb, - unsigned int proto, unsigned int offset) -{ - switch (proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_UDPLITE: { - const __be16 *pptr; - __be16 _ports[2]; - - pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports); - if (pptr == NULL) { - audit_log_format(ab, " truncated=1"); - return; - } - - audit_log_format(ab, " sport=%hu dport=%hu", - ntohs(pptr[0]), ntohs(pptr[1])); - } - break; - - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: { - const u8 *iptr; - u8 _ih[2]; - - iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih); - if (iptr == NULL) { - audit_log_format(ab, " truncated=1"); - return; - } - - audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu", - iptr[0], iptr[1]); - - } - break; - } -} - -static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) +static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) { struct iphdr _iph; const struct iphdr *ih; - ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); - if (!ih) { - audit_log_format(ab, " truncated=1"); - return; - } + ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph); + if (!ih) + return false; - audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu", - &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol); + audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu", + &ih->saddr, &ih->daddr, ih->protocol); - if (ntohs(ih->frag_off) & IP_OFFSET) { - audit_log_format(ab, " frag=1"); - return; - } - - audit_proto(ab, skb, ih->protocol, ih->ihl * 4); + return true; } -static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) +static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) { struct ipv6hdr _ip6h; const struct ipv6hdr *ih; u8 nexthdr; __be16 frag_off; - int offset; ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); - if (!ih) { - audit_log_format(ab, " truncated=1"); - return; - } + if (!ih) + return false; nexthdr = ih->nexthdr; - offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), - &nexthdr, &frag_off); + ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off); audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", &ih->saddr, &ih->daddr, nexthdr); - if (offset) - audit_proto(ab, skb, nexthdr, offset); + return true; } static unsigned int audit_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_audit_info *info = par->targinfo; struct audit_buffer *ab; + int fam = -1; if (audit_enabled == 0) goto errout; - ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); if (ab == NULL) goto errout; - audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", - info->type, xt_hooknum(par), skb->len, - xt_in(par) ? xt_inname(par) : "?", - xt_out(par) ? xt_outname(par) : "?"); - - if (skb->mark) - audit_log_format(ab, " mark=%#x", skb->mark); - - if (skb->dev && skb->dev->type == ARPHRD_ETHER) { - audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - - if (xt_family(par) == NFPROTO_BRIDGE) { - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - audit_ip4(ab, skb); - break; - - case htons(ETH_P_IPV6): - audit_ip6(ab, skb); - break; - } - } - } + audit_log_format(ab, "mark=%#x", skb->mark); switch (xt_family(par)) { + case NFPROTO_BRIDGE: + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; + break; + case htons(ETH_P_IPV6): + fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; + break; + } + break; case NFPROTO_IPV4: - audit_ip4(ab, skb); + fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; break; - case NFPROTO_IPV6: - audit_ip6(ab, skb); + fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; break; } -#ifdef CONFIG_NETWORK_SECMARK - if (skb->secmark) - audit_log_secctx(ab, skb->secmark); -#endif + if (fam == -1) + audit_log_format(ab, " saddr=? daddr=? proto=-1"); audit_log_end(ab); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index bb7ad82dcd56..623ef37de886 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); if (help == NULL) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -263,7 +263,7 @@ out: err4: help = nfct_help(ct); if (help) - module_put(help->helper->me); + nf_conntrack_helper_put(help->helper); err3: nf_ct_tmpl_free(ct); err2: @@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, if (ct) { help = nfct_help(ct); if (help) - module_put(help->helper->me); + nf_conntrack_helper_put(help->helper); nf_ct_netns_put(par->net, par->family); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 37d581a31cff..3f6c4fa78bdb 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -388,10 +388,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, } sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; - if (sz <= PAGE_SIZE) - t = kzalloc(sz, GFP_KERNEL); - else - t = vzalloc(sz); + t = kvzalloc(sz, GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index bf602e33c40a..08679ebb3068 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); if (!help) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a) static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) { if (ct_info->helper) - module_put(ct_info->helper->me); + nf_conntrack_helper_put(ct_info->helper); if (ct_info->ct) nf_ct_tmpl_free(ct_info->ct); } diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 2efb36c08f2a..dee469fed967 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -203,8 +203,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) head; rcu_assign_pointer(tp->root, new); - if (head) - call_rcu(&head->rcu, mall_destroy_rcu); + call_rcu(&head->rcu, mall_destroy_rcu); return 0; err_replace_hw_filter: diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bbe57d57b67f..e88342fde1bc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1831,6 +1831,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (!qdisc_dev(root)) return 0; + if (tcm->tcm_parent) { + q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); + if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + return 0; + } hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index d00f4c7c2f3a..b30a2c70bd48 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -376,10 +376,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) if (mask != q->tab_mask) { struct sk_buff **ntab; - ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), - GFP_KERNEL | __GFP_NOWARN); - if (!ntab) - ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); + ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO); if (!ntab) return -ENOMEM; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index da4f67bda0ee..b488721a0059 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -624,16 +624,6 @@ static void fq_rehash(struct fq_sched_data *q, q->stat_gc_flows += fcnt; } -static void *fq_alloc_node(size_t sz, int node) -{ - void *ptr; - - ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node); - if (!ptr) - ptr = vmalloc_node(sz, node); - return ptr; -} - static void fq_free(void *addr) { kvfree(addr); @@ -650,7 +640,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) return 0; /* If XPS was setup, we can allocate memory on right NUMA node */ - array = fq_alloc_node(sizeof(struct rb_root) << log, + array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT, netdev_queue_numa_node_read(sch->dev_queue)); if (!array) return -ENOMEM; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 18bbb5476c83..9201abce928c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -446,27 +446,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static void *fq_codel_zalloc(size_t sz) -{ - void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vzalloc(sz); - return ptr; -} - -static void fq_codel_free(void *addr) -{ - kvfree(addr); -} - static void fq_codel_destroy(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); tcf_destroy_chain(&q->filter_list); - fq_codel_free(q->backlogs); - fq_codel_free(q->flows); + kvfree(q->backlogs); + kvfree(q->flows); } static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) @@ -493,13 +479,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) } if (!q->flows) { - q->flows = fq_codel_zalloc(q->flows_cnt * - sizeof(struct fq_codel_flow)); + q->flows = kvzalloc(q->flows_cnt * + sizeof(struct fq_codel_flow), GFP_KERNEL); if (!q->flows) return -ENOMEM; - q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32)); + q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); if (!q->backlogs) { - fq_codel_free(q->flows); + kvfree(q->flows); return -ENOMEM; } for (i = 0; i < q->flows_cnt; i++) { diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index c19d346e6c5a..51d3ba682af9 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -467,29 +467,14 @@ static void hhf_reset(struct Qdisc *sch) rtnl_kfree_skbs(skb, skb); } -static void *hhf_zalloc(size_t sz) -{ - void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vzalloc(sz); - - return ptr; -} - -static void hhf_free(void *addr) -{ - kvfree(addr); -} - static void hhf_destroy(struct Qdisc *sch) { int i; struct hhf_sched_data *q = qdisc_priv(sch); for (i = 0; i < HHF_ARRAYS_CNT; i++) { - hhf_free(q->hhf_arrays[i]); - hhf_free(q->hhf_valid_bits[i]); + kvfree(q->hhf_arrays[i]); + kvfree(q->hhf_valid_bits[i]); } for (i = 0; i < HH_FLOWS_CNT; i++) { @@ -503,7 +488,7 @@ static void hhf_destroy(struct Qdisc *sch) kfree(flow); } } - hhf_free(q->hh_flows); + kvfree(q->hh_flows); } static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { @@ -609,8 +594,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) if (!q->hh_flows) { /* Initialize heavy-hitter flow table. */ - q->hh_flows = hhf_zalloc(HH_FLOWS_CNT * - sizeof(struct list_head)); + q->hh_flows = kvzalloc(HH_FLOWS_CNT * + sizeof(struct list_head), GFP_KERNEL); if (!q->hh_flows) return -ENOMEM; for (i = 0; i < HH_FLOWS_CNT; i++) @@ -624,8 +609,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) /* Initialize heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { - q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * - sizeof(u32)); + q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN * + sizeof(u32), GFP_KERNEL); if (!q->hhf_arrays[i]) { /* Note: hhf_destroy() will be called * by our caller. @@ -637,8 +622,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) /* Initialize valid bits of heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { - q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / - BITS_PER_BYTE); + q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN / + BITS_PER_BYTE, GFP_KERNEL); if (!q->hhf_valid_bits[i]) { /* Note: hhf_destroy() will be called * by our caller. diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f0ce4780f395..1b3dd6190e93 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -702,15 +702,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) spinlock_t *root_lock; struct disttable *d; int i; - size_t s; if (n > NETEM_DIST_MAX) return -EINVAL; - s = sizeof(struct disttable) + n * sizeof(s16); - d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN); - if (!d) - d = vmalloc(s); + d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); if (!d) return -ENOMEM; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b00e02c139de..332d94be6e1c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -685,11 +685,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) static void *sfq_alloc(size_t sz) { - void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vmalloc(sz); - return ptr; + return kvmalloc(sz, GFP_KERNEL); } static void sfq_free(void *addr) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 961ee59f696a..142b70e959af 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; - union sctp_addr *baddr = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; __u8 matchlen = 0; - __u8 bmatchlen; sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); @@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid) + struct dst_entry *bdst; + __u8 bmatchlen; + + if (!laddr->valid || + laddr->state != SCTP_ADDR_SRC || + laddr->a.sa.sa_family != AF_INET6 || + scope > sctp_scope(&laddr->a)) continue; - if ((laddr->state == SCTP_ADDR_SRC) && - (laddr->a.sa.sa_family == AF_INET6) && - (scope <= sctp_scope(&laddr->a))) { - bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); - if (!baddr || (matchlen < bmatchlen)) { - baddr = &laddr->a; - matchlen = bmatchlen; - } - } - } - if (baddr) { - fl6->saddr = baddr->v6.sin6_addr; - fl6->fl6_sport = baddr->v6.sin6_port; + + fl6->saddr = laddr->a.v6.sin6_addr; + fl6->fl6_sport = laddr->a.v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p); + bdst = ip6_dst_lookup_flow(sk, fl6, final_p); + + if (!IS_ERR(bdst) && + ipv6_chk_addr(dev_net(bdst->dev), + &laddr->a.v6.sin6_addr, bdst->dev, 1)) { + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + break; + } + + bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); + if (matchlen > bmatchlen) + continue; + + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + matchlen = bmatchlen; } rcu_read_unlock(); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 16b7c801f8b6..cb69ab977cd7 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -80,12 +80,11 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_RTR; qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); - qp_attr.ah_attr.port_num = lnk->ibport; - qp_attr.ah_attr.ah_flags = IB_AH_GRH; - qp_attr.ah_attr.grh.hop_limit = 1; - memcpy(&qp_attr.ah_attr.grh.dgid, lnk->peer_gid, - sizeof(lnk->peer_gid)); - memcpy(&qp_attr.ah_attr.dmac, lnk->peer_mac, + qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; + rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); + rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0); + rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); + memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, sizeof(lnk->peer_mac)); qp_attr.dest_qp_num = lnk->peer_qpn; qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 919981324171..9aed6fe1bf1a 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -106,7 +106,6 @@ __init int net_sysctl_init(void) ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out1; - register_sysctl_root(&net_sysctl_root); out: return ret; out1: diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 0d4f2f455a7c..1b92b72e812f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -362,25 +362,25 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) return 0; } -#define tipc_wait_for_cond(sock_, timeout_, condition_) \ -({ \ - int rc_ = 0; \ - int done_ = 0; \ - \ - while (!(condition_) && !done_) { \ - struct sock *sk_ = sock->sk; \ - DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ - \ - rc_ = tipc_sk_sock_err(sock_, timeout_); \ - if (rc_) \ - break; \ - prepare_to_wait(sk_sleep(sk_), &wait_, \ - TASK_INTERRUPTIBLE); \ - done_ = sk_wait_event(sk_, timeout_, \ - (condition_), &wait_); \ - remove_wait_queue(sk_sleep(sk_), &wait_); \ - } \ - rc_; \ +#define tipc_wait_for_cond(sock_, timeo_, condition_) \ +({ \ + struct sock *sk_; \ + int rc_; \ + \ + while ((rc_ = !(condition_))) { \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + sk_ = (sock_)->sk; \ + rc_ = tipc_sk_sock_err((sock_), timeo_); \ + if (rc_) \ + break; \ + prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + release_sock(sk_); \ + *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ + sched_annotate_sleep(); \ + lock_sock(sk_); \ + remove_wait_queue(sk_sleep(sk_), &wait_); \ + } \ + rc_; \ }) /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 570fc95dc507..c3bc9da30cff 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2764,8 +2764,8 @@ static int nl80211_parse_mon_options(struct cfg80211_registered_device *rdev, nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]); /* bits 0 and 63 are reserved and must be zero */ - if ((mumimo_groups[0] & BIT(7)) || - (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0))) + if ((mumimo_groups[0] & BIT(0)) || + (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(7))) return -EINVAL; params->vht_mumimo_groups = mumimo_groups; |