summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/protocol.c6
-rw-r--r--net/9p/trans_fd.c3
-rw-r--r--net/9p/trans_rdma.c52
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/atm/mpoa_proc.c2
-rw-r--r--net/bluetooth/hci_core.c148
-rw-r--r--net/bluetooth/hci_event.c107
-rw-r--r--net/bluetooth/hidp/core.c3
-rw-r--r--net/bluetooth/mgmt.c154
-rw-r--r--net/bridge/br_forward.c13
-rw-r--r--net/bridge/br_input.c16
-rw-r--r--net/bridge/br_multicast.c3
-rw-r--r--net/bridge/br_netfilter.c198
-rw-r--r--net/bridge/br_private.h6
-rw-r--r--net/bridge/br_stp_bpdu.c5
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c26
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c8
-rw-r--r--net/ceph/ceph_common.c37
-rw-r--r--net/ceph/crush/crush.c14
-rw-r--r--net/ceph/crush/crush_ln_table.h166
-rw-r--r--net/ceph/crush/mapper.c118
-rw-r--r--net/ceph/debugfs.c24
-rw-r--r--net/ceph/messenger.c34
-rw-r--r--net/ceph/osdmap.c25
-rw-r--r--net/core/dev.c43
-rw-r--r--net/core/filter.c41
-rw-r--r--net/core/net_namespace.c104
-rw-r--r--net/core/rtnetlink.c54
-rw-r--r--net/core/skbuff.c11
-rw-r--r--net/core/sock.c3
-rw-r--r--net/dccp/minisocks.c19
-rw-r--r--net/decnet/dn_neigh.c35
-rw-r--r--net/decnet/dn_nsp_in.c5
-rw-r--r--net/decnet/dn_route.c26
-rw-r--r--net/dsa/dsa.c6
-rw-r--r--net/ipv4/arp.c10
-rw-r--r--net/ipv4/fou.c233
-rw-r--r--net/ipv4/geneve.c10
-rw-r--r--net/ipv4/inet_diag.c32
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/inet_timewait_sock.c270
-rw-r--r--net/ipv4/ip_forward.c11
-rw-r--r--net/ipv4/ip_input.c10
-rw-r--r--net/ipv4/ip_output.c48
-rw-r--r--net/ipv4/ip_tunnel.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c3
-rw-r--r--net/ipv4/ipmr.c7
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c9
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c11
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c6
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_dctcp.c5
-rw-r--r--net/ipv4/tcp_fastopen.c1
-rw-r--r--net/ipv4/tcp_illinois.c6
-rw-r--r--net/ipv4/tcp_input.c83
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_metrics.c13
-rw-r--r--net/ipv4/tcp_minisocks.c49
-rw-r--r--net/ipv4/tcp_output.c42
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/tcp_vegas.c5
-rw-r--r--net/ipv4/tcp_vegas.h2
-rw-r--r--net/ipv4/tcp_westwood.c6
-rw-r--r--net/ipv4/udp.c5
-rw-r--r--net/ipv4/udp_tunnel.c4
-rw-r--r--net/ipv4/xfrm4_input.c5
-rw-r--r--net/ipv4/xfrm4_output.c12
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_input.c11
-rw-r--r--net/ipv6/ip6_output.c33
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6_udp_tunnel.c5
-rw-r--r--net/ipv6/ip6_vti.c3
-rw-r--r--net/ipv6/ip6mr.c7
-rw-r--r--net/ipv6/mcast.c9
-rw-r--r--net/ipv6/ndisc.c5
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c11
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c6
-rw-r--r--net/ipv6/output_core.c23
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/ipv6/xfrm6_output.c15
-rw-r--r--net/mac80211/agg-tx.c44
-rw-r--r--net/mac80211/driver-ops.h12
-rw-r--r--net/mac80211/ieee80211_i.h28
-rw-r--r--net/mac80211/iface.c23
-rw-r--r--net/mac80211/main.c12
-rw-r--r--net/mac80211/mlme.c14
-rw-r--r--net/mac80211/rc80211_minstrel.c125
-rw-r--r--net/mac80211/rc80211_minstrel.h49
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c125
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c217
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h5
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c216
-rw-r--r--net/mac80211/rx.c22
-rw-r--r--net/mac80211/sta_info.c186
-rw-r--r--net/mac80211/sta_info.h40
-rw-r--r--net/mac80211/status.c8
-rw-r--r--net/mac80211/trace.c1
-rw-r--r--net/mac80211/trace.h49
-rw-r--r--net/mac80211/trace_msg.h53
-rw-r--r--net/mac80211/tx.c115
-rw-r--r--net/mac80211/util.c62
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c8
-rw-r--r--net/netfilter/nf_log_common.c5
-rw-r--r--net/netfilter/nf_queue.c24
-rw-r--r--net/netfilter/nf_tables_api.c447
-rw-r--r--net/netfilter/nf_tables_core.c48
-rw-r--r--net/netfilter/nfnetlink_log.c23
-rw-r--r--net/netfilter/nfnetlink_queue_core.c37
-rw-r--r--net/netfilter/nft_bitwise.c37
-rw-r--r--net/netfilter/nft_byteorder.c40
-rw-r--r--net/netfilter/nft_cmp.c44
-rw-r--r--net/netfilter/nft_compat.c32
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c118
-rw-r--r--net/netfilter/nft_dynset.c265
-rw-r--r--net/netfilter/nft_expr_template.c94
-rw-r--r--net/netfilter/nft_exthdr.c23
-rw-r--r--net/netfilter/nft_hash.c128
-rw-r--r--net/netfilter/nft_immediate.c18
-rw-r--r--net/netfilter/nft_limit.c5
-rw-r--r--net/netfilter/nft_log.c2
-rw-r--r--net/netfilter/nft_lookup.c33
-rw-r--r--net/netfilter/nft_meta.c112
-rw-r--r--net/netfilter/nft_nat.c71
-rw-r--r--net/netfilter/nft_payload.c24
-rw-r--r--net/netfilter/nft_queue.c4
-rw-r--r--net/netfilter/nft_rbtree.c15
-rw-r--r--net/netfilter/nft_redir.c19
-rw-r--r--net/netfilter/nft_reject_inet.c5
-rw-r--r--net/netfilter/xt_TPROXY.c4
-rw-r--r--net/netfilter/xt_cgroup.c2
-rw-r--r--net/netfilter/xt_physdev.c34
-rw-r--r--net/netfilter/xt_socket.c95
-rw-r--r--net/nfc/nci/core.c11
-rw-r--r--net/nfc/netlink.c2
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/vport-vxlan.c5
-rw-r--r--net/rds/connection.c3
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/send.c33
-rw-r--r--net/sched/act_bpf.c3
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/cls_bpf.c3
-rw-r--r--net/sched/sch_ingress.c9
-rw-r--r--net/sched/sch_netem.c3
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--net/sunrpc/xprtrdma/verbs.c4
-rw-r--r--net/tipc/udp_media.c6
-rw-r--r--net/wireless/Kconfig2
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/reg.c34
-rw-r--r--net/wireless/reg.h9
-rw-r--r--net/wireless/sme.c74
-rw-r--r--net/wireless/util.c41
-rw-r--r--net/xfrm/xfrm_input.c10
-rw-r--r--net/xfrm/xfrm_output.c16
-rw-r--r--net/xfrm/xfrm_user.c5
172 files changed, 4179 insertions, 1993 deletions
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index e9d0f0c1a048..16d287565987 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -275,7 +275,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
}
break;
case 'R':{
- int16_t *nwqid = va_arg(ap, int16_t *);
+ uint16_t *nwqid = va_arg(ap, uint16_t *);
struct p9_qid **wqids =
va_arg(ap, struct p9_qid **);
@@ -440,7 +440,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
stbuf->n_gid, stbuf->n_muid);
} break;
case 'V':{
- int32_t count = va_arg(ap, int32_t);
+ uint32_t count = va_arg(ap, uint32_t);
struct iov_iter *from =
va_arg(ap, struct iov_iter *);
errcode = p9pdu_writef(pdu, proto_version, "d",
@@ -471,7 +471,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
}
break;
case 'R':{
- int16_t nwqid = va_arg(ap, int);
+ uint16_t nwqid = va_arg(ap, int);
struct p9_qid *wqids =
va_arg(ap, struct p9_qid *);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 3e3d82d8ff70..bced8c074c12 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -734,6 +734,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
opts->port = P9_PORT;
opts->rfd = ~0;
opts->wfd = ~0;
+ opts->privport = 0;
if (!params)
return 0;
@@ -1013,7 +1014,6 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
{
int err;
struct p9_fd_opts opts;
- struct p9_trans_fd *p;
parse_opts(args, &opts);
@@ -1026,7 +1026,6 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
if (err < 0)
return err;
- p = (struct p9_trans_fd *) client->trans;
p9_conn_create(client);
return 0;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 14ad43b5cf89..3533d2a53ab6 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -139,6 +139,7 @@ struct p9_rdma_opts {
int sq_depth;
int rq_depth;
long timeout;
+ int privport;
};
/*
@@ -146,7 +147,10 @@ struct p9_rdma_opts {
*/
enum {
/* Options that take integer arguments */
- Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err,
+ Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout,
+ /* Options that take no argument */
+ Opt_privport,
+ Opt_err,
};
static match_table_t tokens = {
@@ -154,6 +158,7 @@ static match_table_t tokens = {
{Opt_sq_depth, "sq=%u"},
{Opt_rq_depth, "rq=%u"},
{Opt_timeout, "timeout=%u"},
+ {Opt_privport, "privport"},
{Opt_err, NULL},
};
@@ -175,6 +180,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
opts->sq_depth = P9_RDMA_SQ_DEPTH;
opts->rq_depth = P9_RDMA_RQ_DEPTH;
opts->timeout = P9_RDMA_TIMEOUT;
+ opts->privport = 0;
if (!params)
return 0;
@@ -193,13 +199,13 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
if (!*p)
continue;
token = match_token(p, tokens, args);
- if (token == Opt_err)
- continue;
- r = match_int(&args[0], &option);
- if (r < 0) {
- p9_debug(P9_DEBUG_ERROR,
- "integer field, but no integer?\n");
- continue;
+ if ((token != Opt_err) && (token != Opt_privport)) {
+ r = match_int(&args[0], &option);
+ if (r < 0) {
+ p9_debug(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ continue;
+ }
}
switch (token) {
case Opt_port:
@@ -214,6 +220,9 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
case Opt_timeout:
opts->timeout = option;
break;
+ case Opt_privport:
+ opts->privport = 1;
+ break;
default:
continue;
}
@@ -607,6 +616,23 @@ static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
return 0;
}
+static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
+{
+ struct sockaddr_in cl = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ };
+ int port, err = -EINVAL;
+
+ for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
+ cl.sin_port = htons((ushort)port);
+ err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
+ if (err != -EADDRINUSE)
+ break;
+ }
+ return err;
+}
+
/**
* trans_create_rdma - Transport method for creating atransport instance
* @client: client instance
@@ -642,6 +668,16 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
/* Associate the client with the transport */
client->trans = rdma;
+ /* Bind to a privileged port if we need to */
+ if (opts.privport) {
+ err = p9_rdma_bind_privport(rdma);
+ if (err < 0) {
+ pr_err("%s (%d): problem binding to privport: %d\n",
+ __func__, task_pid_nr(current), -err);
+ goto error;
+ }
+ }
+
/* Resolve the server's address */
rdma->addr.sin_family = AF_INET;
rdma->addr.sin_addr.s_addr = in_aton(addr);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e62bcbbabb5e..9dd49ca67dbc 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -525,7 +525,10 @@ static ssize_t p9_mount_tag_show(struct device *dev,
vdev = dev_to_virtio(dev);
chan = vdev->priv;
- return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
+ memcpy(buf, chan->tag, chan->tag_len);
+ buf[chan->tag_len] = 0;
+
+ return chan->tag_len + 1;
}
static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 5bdd300db0f7..2df34eb5d65f 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -272,7 +272,7 @@ static int parse_qos(const char *buff)
qos.rxtp.max_pcr = rx_pcr;
qos.rxtp.max_sdu = rx_sdu;
qos.aal = ATM_AAL5;
- dprintk("parse_qos(): setting qos paramameters to tx=%d,%d rx=%d,%d\n",
+ dprintk("parse_qos(): setting qos parameters to tx=%d,%d rx=%d,%d\n",
qos.txtp.max_pcr, qos.txtp.max_sdu,
qos.rxtp.max_pcr, qos.rxtp.max_sdu);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 46b114c0140b..476709bd068a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3200,7 +3200,7 @@ EXPORT_SYMBOL(hci_register_dev);
/* Unregister HCI device */
void hci_unregister_dev(struct hci_dev *hdev)
{
- int i, id;
+ int id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
@@ -3214,9 +3214,6 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_dev_do_close(hdev);
- for (i = 0; i < NUM_REASSEMBLY; i++)
- kfree_skb(hdev->reassembly[i]);
-
cancel_work_sync(&hdev->power_on);
if (!test_bit(HCI_INIT, &hdev->flags) &&
@@ -3320,149 +3317,6 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
}
EXPORT_SYMBOL(hci_recv_frame);
-static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
- int count, __u8 index)
-{
- int len = 0;
- int hlen = 0;
- int remain = count;
- struct sk_buff *skb;
- struct bt_skb_cb *scb;
-
- if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) ||
- index >= NUM_REASSEMBLY)
- return -EILSEQ;
-
- skb = hdev->reassembly[index];
-
- if (!skb) {
- switch (type) {
- case HCI_ACLDATA_PKT:
- len = HCI_MAX_FRAME_SIZE;
- hlen = HCI_ACL_HDR_SIZE;
- break;
- case HCI_EVENT_PKT:
- len = HCI_MAX_EVENT_SIZE;
- hlen = HCI_EVENT_HDR_SIZE;
- break;
- case HCI_SCODATA_PKT:
- len = HCI_MAX_SCO_SIZE;
- hlen = HCI_SCO_HDR_SIZE;
- break;
- }
-
- skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
-
- scb = (void *) skb->cb;
- scb->expect = hlen;
- scb->pkt_type = type;
-
- hdev->reassembly[index] = skb;
- }
-
- while (count) {
- scb = (void *) skb->cb;
- len = min_t(uint, scb->expect, count);
-
- memcpy(skb_put(skb, len), data, len);
-
- count -= len;
- data += len;
- scb->expect -= len;
- remain = count;
-
- switch (type) {
- case HCI_EVENT_PKT:
- if (skb->len == HCI_EVENT_HDR_SIZE) {
- struct hci_event_hdr *h = hci_event_hdr(skb);
- scb->expect = h->plen;
-
- if (skb_tailroom(skb) < scb->expect) {
- kfree_skb(skb);
- hdev->reassembly[index] = NULL;
- return -ENOMEM;
- }
- }
- break;
-
- case HCI_ACLDATA_PKT:
- if (skb->len == HCI_ACL_HDR_SIZE) {
- struct hci_acl_hdr *h = hci_acl_hdr(skb);
- scb->expect = __le16_to_cpu(h->dlen);
-
- if (skb_tailroom(skb) < scb->expect) {
- kfree_skb(skb);
- hdev->reassembly[index] = NULL;
- return -ENOMEM;
- }
- }
- break;
-
- case HCI_SCODATA_PKT:
- if (skb->len == HCI_SCO_HDR_SIZE) {
- struct hci_sco_hdr *h = hci_sco_hdr(skb);
- scb->expect = h->dlen;
-
- if (skb_tailroom(skb) < scb->expect) {
- kfree_skb(skb);
- hdev->reassembly[index] = NULL;
- return -ENOMEM;
- }
- }
- break;
- }
-
- if (scb->expect == 0) {
- /* Complete frame */
-
- bt_cb(skb)->pkt_type = type;
- hci_recv_frame(hdev, skb);
-
- hdev->reassembly[index] = NULL;
- return remain;
- }
- }
-
- return remain;
-}
-
-#define STREAM_REASSEMBLY 0
-
-int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
-{
- int type;
- int rem = 0;
-
- while (count) {
- struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
-
- if (!skb) {
- struct { char type; } *pkt;
-
- /* Start of the frame */
- pkt = data;
- type = pkt->type;
-
- data++;
- count--;
- } else
- type = bt_cb(skb)->pkt_type;
-
- rem = hci_reassembly(hdev, type, data, count,
- STREAM_REASSEMBLY);
- if (rem < 0)
- return rem;
-
- data += (count - rem);
- count = rem;
- }
-
- return rem;
-}
-EXPORT_SYMBOL(hci_recv_stream_fragment);
-
/* ---- Interface to upper protocols ---- */
int hci_register_cb(struct hci_cb *cb)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 01031038eb0e..7b61be73650f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2036,6 +2036,33 @@ unlock:
hci_dev_unlock(hdev);
}
+static void hci_cs_le_read_remote_features(struct hci_dev *hdev, u8 status)
+{
+ struct hci_cp_le_read_remote_features *cp;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (!status)
+ return;
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_READ_REMOTE_FEATURES);
+ if (!cp)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+ if (conn) {
+ if (conn->state == BT_CONFIG) {
+ hci_connect_cfm(conn, status);
+ hci_conn_drop(conn);
+ }
+ }
+
+ hci_dev_unlock(hdev);
+}
+
static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
{
struct hci_cp_le_start_enc *cp;
@@ -3104,6 +3131,10 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_cs_le_create_conn(hdev, ev->status);
break;
+ case HCI_OP_LE_READ_REMOTE_FEATURES:
+ hci_cs_le_read_remote_features(hdev, ev->status);
+ break;
+
case HCI_OP_LE_START_ENC:
hci_cs_le_start_enc(hdev, ev->status);
break;
@@ -4515,7 +4546,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->sec_level = BT_SECURITY_LOW;
conn->handle = __le16_to_cpu(ev->handle);
- conn->state = BT_CONNECTED;
+ conn->state = BT_CONFIG;
conn->le_conn_interval = le16_to_cpu(ev->interval);
conn->le_conn_latency = le16_to_cpu(ev->latency);
@@ -4524,7 +4555,33 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_debugfs_create_conn(conn);
hci_conn_add_sysfs(conn);
- hci_connect_cfm(conn, ev->status);
+ if (!ev->status) {
+ /* The remote features procedure is defined for master
+ * role only. So only in case of an initiated connection
+ * request the remote features.
+ *
+ * If the local controller supports slave-initiated features
+ * exchange, then requesting the remote features in slave
+ * role is possible. Otherwise just transition into the
+ * connected state without requesting the remote features.
+ */
+ if (conn->out ||
+ (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
+ struct hci_cp_le_read_remote_features cp;
+
+ cp.handle = __cpu_to_le16(conn->handle);
+
+ hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES,
+ sizeof(cp), &cp);
+
+ hci_conn_hold(conn);
+ } else {
+ conn->state = BT_CONNECTED;
+ hci_connect_cfm(conn, ev->status);
+ }
+ } else {
+ hci_connect_cfm(conn, ev->status);
+ }
params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
conn->dst_type);
@@ -4826,6 +4883,48 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_unlock(hdev);
}
+static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_ev_le_remote_feat_complete *ev = (void *)skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+ if (conn) {
+ if (!ev->status)
+ memcpy(conn->features[0], ev->features, 8);
+
+ if (conn->state == BT_CONFIG) {
+ __u8 status;
+
+ /* If the local controller supports slave-initiated
+ * features exchange, but the remote controller does
+ * not, then it is possible that the error code 0x1a
+ * for unsupported remote feature gets returned.
+ *
+ * In this specific case, allow the connection to
+ * transition into connected state and mark it as
+ * successful.
+ */
+ if ((hdev->le_features[0] & HCI_LE_SLAVE_FEATURES) &&
+ !conn->out && ev->status == 0x1a)
+ status = 0x00;
+ else
+ status = ev->status;
+
+ conn->state = BT_CONNECTED;
+ hci_connect_cfm(conn, status);
+ hci_conn_drop(conn);
+ }
+ }
+
+ hci_dev_unlock(hdev);
+}
+
static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_le_ltk_req *ev = (void *) skb->data;
@@ -4999,6 +5098,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_le_adv_report_evt(hdev, skb);
break;
+ case HCI_EV_LE_REMOTE_FEAT_COMPLETE:
+ hci_le_remote_feat_complete_evt(hdev, skb);
+ break;
+
case HCI_EV_LE_LTK_REQ:
hci_le_ltk_request_evt(hdev, skb);
break;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index a05b9dbf14c9..9070dfd6b4ad 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1313,7 +1313,8 @@ int hidp_connection_add(struct hidp_connadd_req *req,
struct socket *ctrl_sock,
struct socket *intr_sock)
{
- u32 valid_flags = 0;
+ u32 valid_flags = BIT(HIDP_VIRTUAL_CABLE_UNPLUG) |
+ BIT(HIDP_BOOT_PROTOCOL_MODE);
struct hidp_session *session;
struct l2cap_conn *conn;
struct l2cap_chan *chan;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 845dfcc43a20..7fd87e7135b5 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6466,6 +6466,145 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
return eir_len;
}
+static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode, struct sk_buff *skb)
+{
+ const struct mgmt_cp_read_local_oob_ext_data *mgmt_cp;
+ struct mgmt_rp_read_local_oob_ext_data *mgmt_rp;
+ u8 *h192, *r192, *h256, *r256;
+ struct mgmt_pending_cmd *cmd;
+ u16 eir_len;
+ int err;
+
+ BT_DBG("%s status %u", hdev->name, status);
+
+ cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev);
+ if (!cmd)
+ return;
+
+ mgmt_cp = cmd->param;
+
+ if (status) {
+ status = mgmt_status(status);
+ eir_len = 0;
+
+ h192 = NULL;
+ r192 = NULL;
+ h256 = NULL;
+ r256 = NULL;
+ } else if (opcode == HCI_OP_READ_LOCAL_OOB_DATA) {
+ struct hci_rp_read_local_oob_data *rp;
+
+ if (skb->len != sizeof(*rp)) {
+ status = MGMT_STATUS_FAILED;
+ eir_len = 0;
+ } else {
+ status = MGMT_STATUS_SUCCESS;
+ rp = (void *)skb->data;
+
+ eir_len = 5 + 18 + 18;
+ h192 = rp->hash;
+ r192 = rp->rand;
+ h256 = NULL;
+ r256 = NULL;
+ }
+ } else {
+ struct hci_rp_read_local_oob_ext_data *rp;
+
+ if (skb->len != sizeof(*rp)) {
+ status = MGMT_STATUS_FAILED;
+ eir_len = 0;
+ } else {
+ status = MGMT_STATUS_SUCCESS;
+ rp = (void *)skb->data;
+
+ if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
+ eir_len = 5 + 18 + 18;
+ h192 = NULL;
+ r192 = NULL;
+ } else {
+ eir_len = 5 + 18 + 18 + 18 + 18;
+ h192 = rp->hash192;
+ r192 = rp->rand192;
+ }
+
+ h256 = rp->hash256;
+ r256 = rp->rand256;
+ }
+ }
+
+ mgmt_rp = kmalloc(sizeof(*mgmt_rp) + eir_len, GFP_KERNEL);
+ if (!mgmt_rp)
+ goto done;
+
+ if (status)
+ goto send_rsp;
+
+ eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV,
+ hdev->dev_class, 3);
+
+ if (h192 && r192) {
+ eir_len = eir_append_data(mgmt_rp->eir, eir_len,
+ EIR_SSP_HASH_C192, h192, 16);
+ eir_len = eir_append_data(mgmt_rp->eir, eir_len,
+ EIR_SSP_RAND_R192, r192, 16);
+ }
+
+ if (h256 && r256) {
+ eir_len = eir_append_data(mgmt_rp->eir, eir_len,
+ EIR_SSP_HASH_C256, h256, 16);
+ eir_len = eir_append_data(mgmt_rp->eir, eir_len,
+ EIR_SSP_RAND_R256, r256, 16);
+ }
+
+send_rsp:
+ mgmt_rp->type = mgmt_cp->type;
+ mgmt_rp->eir_len = cpu_to_le16(eir_len);
+
+ err = mgmt_cmd_complete(cmd->sk, hdev->id,
+ MGMT_OP_READ_LOCAL_OOB_EXT_DATA, status,
+ mgmt_rp, sizeof(*mgmt_rp) + eir_len);
+ if (err < 0 || status)
+ goto done;
+
+ hci_sock_set_flag(cmd->sk, HCI_MGMT_OOB_DATA_EVENTS);
+
+ err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev,
+ mgmt_rp, sizeof(*mgmt_rp) + eir_len,
+ HCI_MGMT_OOB_DATA_EVENTS, cmd->sk);
+done:
+ kfree(mgmt_rp);
+ mgmt_pending_remove(cmd);
+}
+
+static int read_local_ssp_oob_req(struct hci_dev *hdev, struct sock *sk,
+ struct mgmt_cp_read_local_oob_ext_data *cp)
+{
+ struct mgmt_pending_cmd *cmd;
+ struct hci_request req;
+ int err;
+
+ cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev,
+ cp, sizeof(*cp));
+ if (!cmd)
+ return -ENOMEM;
+
+ hci_req_init(&req, hdev);
+
+ if (bredr_sc_enabled(hdev))
+ hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_EXT_DATA, 0, NULL);
+ else
+ hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL);
+
+ err = hci_req_run_skb(&req, read_local_oob_ext_data_complete);
+ if (err < 0) {
+ mgmt_pending_remove(cmd);
+ return err;
+ }
+
+ return 0;
+}
+
static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev,
void *data, u16 data_len)
{
@@ -6517,8 +6656,19 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev,
eir_len = 0;
switch (cp->type) {
case BIT(BDADDR_BREDR):
- eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV,
- hdev->dev_class, 3);
+ if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) {
+ err = read_local_ssp_oob_req(hdev, sk, cp);
+ hci_dev_unlock(hdev);
+ if (!err)
+ goto done;
+
+ status = MGMT_STATUS_FAILED;
+ goto complete;
+ } else {
+ eir_len = eir_append_data(rp->eir, eir_len,
+ EIR_CLASS_OF_DEV,
+ hdev->dev_class, 3);
+ }
break;
case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)):
if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) &&
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 3304a5442331..e97572b5d2cc 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -35,7 +35,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
p->state == BR_STATE_FORWARDING;
}
-int br_dev_queue_push_xmit(struct sk_buff *skb)
+int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
{
if (!is_skb_forwardable(skb->dev, skb)) {
kfree_skb(skb);
@@ -49,9 +49,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
-int br_forward_finish(struct sk_buff *skb)
+int br_forward_finish(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
+ return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb,
+ NULL, skb->dev,
br_dev_queue_push_xmit);
}
@@ -75,7 +76,8 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
return;
}
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
+ NULL, skb->dev,
br_forward_finish);
}
@@ -96,7 +98,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
skb->dev = to->dev;
skb_forward_csum(skb);
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb,
+ indev, skb->dev,
br_forward_finish);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 052c5ebbc947..f921a5dce22d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -55,8 +55,9 @@ static int br_pass_frame_up(struct sk_buff *skb)
if (!skb)
return NET_RX_DROP;
- return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
- netif_receive_skb);
+ return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
+ indev, NULL,
+ netif_receive_skb_sk);
}
static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
@@ -119,7 +120,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
}
/* note: already called with rcu_read_lock */
-int br_handle_frame_finish(struct sk_buff *skb)
+int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
{
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
@@ -207,7 +208,7 @@ drop:
EXPORT_SYMBOL_GPL(br_handle_frame_finish);
/* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct sk_buff *skb)
+static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb)
{
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
u16 vid = 0;
@@ -277,8 +278,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
}
/* Deliver packet to local host only */
- if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
- NULL, br_handle_local_finish)) {
+ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
+ skb->dev, NULL, br_handle_local_finish)) {
return RX_HANDLER_CONSUMED; /* consumed by filter */
} else {
*pskb = skb;
@@ -302,7 +303,8 @@ forward:
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb,
+ skb->dev, NULL,
br_handle_frame_finish);
break;
default:
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index c465876c7861..4b6722f8f179 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -814,7 +814,8 @@ static void __br_multicast_send_query(struct net_bridge *br,
if (port) {
skb->dev = port->dev;
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
+ NULL, skb->dev,
br_dev_queue_push_xmit);
} else {
br_multicast_select_own_querier(br, ip, skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 7527e94dd5dc..ab55e2472beb 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,6 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
+/* largest possible L2 header, see br_nf_dev_queue_xmit() */
+#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+struct brnf_frag_data {
+ char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
+ u8 encap_size;
+ u8 size;
+};
+
+static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
+#endif
+
+static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb)
+{
+ return skb->nf_bridge;
+}
+
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
struct net_bridge_port *port;
@@ -189,14 +207,6 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
skb->network_header += len;
}
-static inline void nf_bridge_save_header(struct sk_buff *skb)
-{
- int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-
- skb_copy_from_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
-}
-
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
@@ -252,23 +262,29 @@ drop:
static void nf_bridge_update_protocol(struct sk_buff *skb)
{
- if (skb->nf_bridge->mask & BRNF_8021Q)
+ switch (skb->nf_bridge->orig_proto) {
+ case BRNF_PROTO_8021Q:
skb->protocol = htons(ETH_P_8021Q);
- else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ break;
+ case BRNF_PROTO_PPPOE:
skb->protocol = htons(ETH_P_PPP_SES);
+ break;
+ case BRNF_PROTO_UNCHANGED:
+ break;
+ }
}
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
* bridge PRE_ROUTING hook. */
-static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
+static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
@@ -282,7 +298,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+ skb->dev, NULL,
br_handle_frame_finish, 1);
return 0;
@@ -293,9 +310,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
* don't, we use the neighbour framework to find out. In both cases, we make
* sure that br_handle_frame_finish() is called afterwards.
*/
-static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
struct neighbour *neigh;
struct dst_entry *dst;
@@ -305,12 +321,13 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
dst = skb_dst(skb);
neigh = dst_neigh_lookup_skb(dst, skb);
if (neigh) {
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;
if (neigh->hh.hh_len) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
- ret = br_handle_frame_finish(skb);
+ ret = br_handle_frame_finish(sk, skb);
} else {
/* the neighbour function below overwrites the complete
* MAC header, so we save the Ethernet source address and
@@ -318,7 +335,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
*/
skb_copy_from_linear_data_offset(skb,
-(ETH_HLEN-ETH_ALEN),
- skb->nf_bridge->data,
+ nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->mask |= BRNF_BRIDGED_DNAT;
@@ -387,11 +404,11 @@ static bool dnat_took_place(const struct sk_buff *skb)
* device, we proceed as if ip_route_input() succeeded. If it differs from the
* logical bridge port or if ip_route_output_key() fails we drop the packet.
*/
-static int br_nf_pre_routing_finish(struct sk_buff *skb)
+static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct iphdr *iph = ip_hdr(skb);
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
int frag_max_size;
@@ -399,9 +416,9 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
frag_max_size = IPCB(skb)->frag_max_size;
BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
if (dnat_took_place(skb)) {
@@ -440,7 +457,7 @@ bridged_dnat:
nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(NFPROTO_BRIDGE,
NF_BR_PRE_ROUTING,
- skb, skb->dev, NULL,
+ sk, skb, skb->dev, NULL,
br_nf_pre_routing_finish_bridge,
1);
return 0;
@@ -460,7 +477,8 @@ bridged_dnat:
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+ skb->dev, NULL,
br_handle_frame_finish, 1);
return 0;
@@ -483,20 +501,21 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
/* Some common code for IPv4/IPv6 */
static struct net_device *setup_pre_routing(struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev);
+
if (skb->protocol == htons(ETH_P_8021Q))
- nf_bridge->mask |= BRNF_8021Q;
+ nf_bridge->orig_proto = BRNF_PROTO_8021Q;
else if (skb->protocol == htons(ETH_P_PPP_SES))
- nf_bridge->mask |= BRNF_PPPoE;
+ nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
@@ -596,7 +615,8 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
return NF_DROP;
skb->protocol = htons(ETH_P_IPV6);
- NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
+ skb->dev, NULL,
br_nf_pre_routing_finish_ipv6);
return NF_STOLEN;
@@ -651,7 +671,8 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
skb->protocol = htons(ETH_P_IP);
- NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb,
+ skb->dev, NULL,
br_nf_pre_routing_finish);
return NF_STOLEN;
@@ -674,16 +695,23 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
}
/* PF_BRIDGE/FORWARD *************************************************/
-static int br_nf_forward_finish(struct sk_buff *skb)
+static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *in;
if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
+ int frag_max_size;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ frag_max_size = IPCB(skb)->frag_max_size;
+ BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
+ }
+
in = nf_bridge->physindev;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge_update_protocol(skb);
} else {
@@ -691,8 +719,8 @@ static int br_nf_forward_finish(struct sk_buff *skb)
}
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
- skb->dev, br_forward_finish, 1);
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb,
+ in, skb->dev, br_forward_finish, 1);
return 0;
}
@@ -718,6 +746,10 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
if (!nf_bridge_unshare(skb))
return NF_DROP;
+ nf_bridge = nf_bridge_info_get(skb);
+ if (!nf_bridge)
+ return NF_DROP;
+
parent = bridge_parent(state->out);
if (!parent)
return NF_DROP;
@@ -731,14 +763,19 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
nf_bridge_pull_encap_header(skb);
- nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
- if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb))
- return NF_DROP;
+ if (pf == NFPROTO_IPV4) {
+ int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size;
+
+ if (br_parse_ip_options(skb))
+ return NF_DROP;
+
+ IPCB(skb)->frag_max_size = frag_max;
+ }
nf_bridge->physoutdev = skb->dev;
if (pf == NFPROTO_IPV4)
@@ -746,7 +783,8 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
else
skb->protocol = htons(ETH_P_IPV6);
- NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, state->in),
+ NF_HOOK(pf, NF_INET_FORWARD, NULL, skb,
+ brnf_get_logical_dev(skb, state->in),
parent, br_nf_forward_finish);
return NF_STOLEN;
@@ -780,69 +818,74 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
return NF_ACCEPT;
}
*d = state->in;
- NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, state->in,
- state->out, br_nf_forward_finish);
+ NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb,
+ state->in, state->out, br_nf_forward_finish);
return NF_STOLEN;
}
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
-static bool nf_bridge_copy_header(struct sk_buff *skb)
+static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
{
+ struct brnf_frag_data *data;
int err;
- unsigned int header_size;
-
- nf_bridge_update_protocol(skb);
- header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow_head(skb, header_size);
- if (err)
- return false;
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return true;
-}
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ err = skb_cow_head(skb, data->size);
-static int br_nf_push_frag_xmit(struct sk_buff *skb)
-{
- if (!nf_bridge_copy_header(skb)) {
+ if (err) {
kfree_skb(skb);
return 0;
}
- return br_dev_queue_push_xmit(skb);
+ skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
+ __skb_push(skb, data->encap_size);
+
+ return br_dev_queue_push_xmit(sk, skb);
}
-static int br_nf_dev_queue_xmit(struct sk_buff *skb)
+static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{
int ret;
int frag_max_size;
unsigned int mtu_reserved;
if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
- return br_dev_queue_push_xmit(skb);
+ return br_dev_queue_push_xmit(sk, skb);
mtu_reserved = nf_bridge_mtu_reduction(skb);
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
if (skb->len + mtu_reserved > skb->dev->mtu) {
+ struct brnf_frag_data *data;
+
frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
if (br_parse_ip_options(skb))
/* Drop invalid packet */
return NF_DROP;
IPCB(skb)->frag_max_size = frag_max_size;
- ret = ip_fragment(skb, br_nf_push_frag_xmit);
- } else
- ret = br_dev_queue_push_xmit(skb);
+
+ nf_bridge_update_protocol(skb);
+
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ data->encap_size = nf_bridge_encap_header_len(skb);
+ data->size = ETH_HLEN + data->encap_size;
+
+ skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
+ data->size);
+
+ ret = ip_fragment(sk, skb, br_nf_push_frag_xmit);
+ } else {
+ ret = br_dev_queue_push_xmit(sk, skb);
+ }
return ret;
}
#else
-static int br_nf_dev_queue_xmit(struct sk_buff *skb)
+static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{
- return br_dev_queue_push_xmit(skb);
+ return br_dev_queue_push_xmit(sk, skb);
}
#endif
@@ -851,7 +894,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *realoutdev = bridge_parent(skb->dev);
u_int8_t pf;
@@ -877,17 +920,17 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
* about the value of skb->pkt_type. */
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
nf_bridge_pull_encap_header(skb);
- nf_bridge_save_header(skb);
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
- NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
+ NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb,
+ NULL, realoutdev,
br_nf_dev_queue_xmit);
return NF_STOLEN;
@@ -919,15 +962,18 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
*/
static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
- skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
- skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+ BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
+
+ skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
+ nf_bridge->neigh_header,
+ ETH_HLEN - ETH_ALEN);
skb->dev = nf_bridge->physindev;
- br_handle_frame_finish(skb);
+ br_handle_frame_finish(NULL, skb);
}
static int br_nf_dev_xmit(struct sk_buff *skb)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b46fa0c5b8ec..6ca0251cb478 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -410,10 +410,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
/* br_forward.c */
void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
-int br_dev_queue_push_xmit(struct sk_buff *skb);
+int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb);
void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, struct sk_buff *skb0);
-int br_forward_finish(struct sk_buff *skb);
+int br_forward_finish(struct sock *sk, struct sk_buff *skb);
void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
struct sk_buff *skb2, bool unicast);
@@ -431,7 +431,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
void br_manage_promisc(struct net_bridge *br);
/* br_input.c */
-int br_handle_frame_finish(struct sk_buff *skb);
+int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index bdb459d21ad8..534fc4cd263e 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -54,8 +54,9 @@ static void br_send_bpdu(struct net_bridge_port *p,
skb_reset_mac_header(skb);
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
- dev_queue_xmit);
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
+ NULL, skb->dev,
+ dev_queue_xmit_sk);
}
static inline void br_set_ticks(unsigned char *dest, int j)
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 4f02109d708f..a21269b83f16 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -19,12 +19,12 @@
#include "../br_private.h"
static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct net_device *in = pkt->in, *out = pkt->out;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
const struct net_bridge_port *p;
switch (priv->key) {
@@ -40,12 +40,12 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
goto out;
}
- strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data));
+ strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
return;
out:
- return nft_meta_get_eval(expr, data, pkt);
+ return nft_meta_get_eval(expr, regs, pkt);
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
@@ -53,27 +53,21 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
- int err;
+ unsigned int len;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
case NFT_META_BRI_IIFNAME:
case NFT_META_BRI_OIFNAME:
+ len = IFNAMSIZ;
break;
default:
return nft_meta_get_init(ctx, expr, tb);
}
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
- return 0;
+ priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
}
static struct nft_expr_type nft_meta_bridge_type;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 54a2fdf0f457..858d848564ee 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -257,8 +257,8 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
}
static void nft_reject_bridge_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
@@ -310,7 +310,7 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
break;
}
out:
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
}
static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
@@ -371,6 +371,8 @@ static int nft_reject_bridge_dump(struct sk_buff *skb,
if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
goto nla_put_failure;
break;
+ default:
+ break;
}
return 0;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ec565508e904..79e8f71aef5b 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -490,6 +490,43 @@ out:
}
EXPORT_SYMBOL(ceph_parse_options);
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
+{
+ struct ceph_options *opt = client->options;
+ size_t pos = m->count;
+
+ if (opt->name)
+ seq_printf(m, "name=%s,", opt->name);
+ if (opt->key)
+ seq_puts(m, "secret=<hidden>,");
+
+ if (opt->flags & CEPH_OPT_FSID)
+ seq_printf(m, "fsid=%pU,", &opt->fsid);
+ if (opt->flags & CEPH_OPT_NOSHARE)
+ seq_puts(m, "noshare,");
+ if (opt->flags & CEPH_OPT_NOCRC)
+ seq_puts(m, "nocrc,");
+ if (opt->flags & CEPH_OPT_NOMSGAUTH)
+ seq_puts(m, "nocephx_require_signatures,");
+ if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
+ seq_puts(m, "notcp_nodelay,");
+
+ if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+ seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+ if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+ seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+ if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+ seq_printf(m, "osdkeepalivetimeout=%d,",
+ opt->osd_keepalive_timeout);
+
+ /* drop redundant comma */
+ if (m->count != pos)
+ m->count--;
+
+ return 0;
+}
+EXPORT_SYMBOL(ceph_print_client_options);
+
u64 ceph_client_id(struct ceph_client *client)
{
return client->monc.auth->global_id;
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 16bc199d9a62..9d84ce4ea0df 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg)
case CRUSH_BUCKET_LIST: return "list";
case CRUSH_BUCKET_TREE: return "tree";
case CRUSH_BUCKET_STRAW: return "straw";
+ case CRUSH_BUCKET_STRAW2: return "straw2";
default: return "unknown";
}
}
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
case CRUSH_BUCKET_STRAW:
return ((struct crush_bucket_straw *)b)->item_weights[p];
+ case CRUSH_BUCKET_STRAW2:
+ return ((struct crush_bucket_straw2 *)b)->item_weights[p];
}
return 0;
}
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
kfree(b);
}
+void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
+{
+ kfree(b->item_weights);
+ kfree(b->h.perm);
+ kfree(b->h.items);
+ kfree(b);
+}
+
void crush_destroy_bucket(struct crush_bucket *b)
{
switch (b->alg) {
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
case CRUSH_BUCKET_STRAW:
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
break;
+ case CRUSH_BUCKET_STRAW2:
+ crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
+ break;
}
}
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
new file mode 100644
index 000000000000..6192c7fc958c
--- /dev/null
+++ b/net/ceph/crush/crush_ln_table.h
@@ -0,0 +1,166 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Intel Corporation All Rights Reserved
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#if defined(__linux__)
+#include <linux/types.h>
+#elif defined(__FreeBSD__)
+#include <sys/types.h>
+#endif
+
+#ifndef CEPH_CRUSH_LN_H
+#define CEPH_CRUSH_LN_H
+
+
+// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
+// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
+
+static int64_t __RH_LH_tbl[128*2+2] = {
+ 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
+ 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
+ 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
+ 0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll,
+ 0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll,
+ 0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll,
+ 0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll,
+ 0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell,
+ 0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll,
+ 0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll,
+ 0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll,
+ 0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll,
+ 0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll,
+ 0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll,
+ 0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all,
+ 0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll,
+ 0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all,
+ 0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell,
+ 0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll,
+ 0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll,
+ 0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll,
+ 0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll,
+ 0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll,
+ 0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll,
+ 0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll,
+ 0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll,
+ 0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell,
+ 0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll,
+ 0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll,
+ 0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll,
+ 0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll,
+ 0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll,
+ 0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll,
+ 0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll,
+ 0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll,
+ 0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll,
+ 0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll,
+ 0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll,
+ 0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll,
+ 0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll,
+ 0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll,
+ 0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll,
+ 0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll,
+ 0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll,
+ 0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll,
+ 0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll,
+ 0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll,
+ 0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll,
+ 0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll,
+ 0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll,
+ 0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll,
+ 0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll,
+ 0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll,
+ 0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell,
+ 0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell,
+ 0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll,
+ 0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell,
+ 0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll,
+ 0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll,
+ 0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll,
+ 0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll,
+ 0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll,
+ 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
+ 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
+ 0x0000800000000000ll, 0x0000ffff00000000ll,
+ };
+
+
+ // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
+static int64_t __LL_tbl[256] = {
+ 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
+ 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
+ 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
+ 0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull,
+ 0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull,
+ 0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull,
+ 0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull,
+ 0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull,
+ 0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull,
+ 0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull,
+ 0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull,
+ 0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull,
+ 0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull,
+ 0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull,
+ 0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull,
+ 0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull,
+ 0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull,
+ 0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull,
+ 0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull,
+ 0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull,
+ 0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull,
+ 0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull,
+ 0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull,
+ 0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull,
+ 0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull,
+ 0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull,
+ 0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull,
+ 0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull,
+ 0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull,
+ 0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull,
+ 0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull,
+ 0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull,
+ 0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull,
+ 0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull,
+ 0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull,
+ 0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull,
+ 0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull,
+ 0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull,
+ 0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull,
+ 0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull,
+ 0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull,
+ 0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull,
+ 0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull,
+ 0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull,
+ 0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull,
+ 0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull,
+ 0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull,
+ 0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull,
+ 0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull,
+ 0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull,
+ 0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull,
+ 0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull,
+ 0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull,
+ 0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull,
+ 0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull,
+ 0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull,
+ 0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull,
+ 0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull,
+ 0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull,
+ 0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull,
+ 0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull,
+ 0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull,
+ 0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull,
+ 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
+};
+
+
+
+
+#endif
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a1ef53c04415..5b47736d27d9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -20,7 +20,7 @@
#include <linux/crush/crush.h>
#include <linux/crush/hash.h>
-#include <linux/crush/mapper.h>
+#include "crush_ln_table.h"
/*
* Implement the core CRUSH mapping algorithm.
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
return bucket->h.items[high];
}
+// compute 2^44*log2(input+1)
+uint64_t crush_ln(unsigned xin)
+{
+ unsigned x=xin, x1;
+ int iexpon, index1, index2;
+ uint64_t RH, LH, LL, xl64, result;
+
+ x++;
+
+ // normalize input
+ iexpon = 15;
+ while(!(x&0x18000)) { x<<=1; iexpon--; }
+
+ index1 = (x>>8)<<1;
+ // RH ~ 2^56/index1
+ RH = __RH_LH_tbl[index1 - 256];
+ // LH ~ 2^48 * log2(index1/256)
+ LH = __RH_LH_tbl[index1 + 1 - 256];
+
+ // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
+ xl64 = (int64_t)x * RH;
+ xl64 >>= 48;
+ x1 = xl64;
+
+ result = iexpon;
+ result <<= (12 + 32);
+
+ index2 = x1 & 0xff;
+ // LL ~ 2^48*log2(1.0+index2/2^15)
+ LL = __LL_tbl[index2];
+
+ LH = LH + LL;
+
+ LH >>= (48-12 - 32);
+ result += LH;
+
+ return result;
+}
+
+
+/*
+ * straw2
+ *
+ * for reference, see:
+ *
+ * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
+ *
+ */
+
+static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+ int x, int r)
+{
+ unsigned i, high = 0;
+ unsigned u;
+ unsigned w;
+ __s64 ln, draw, high_draw = 0;
+
+ for (i = 0; i < bucket->h.size; i++) {
+ w = bucket->item_weights[i];
+ if (w) {
+ u = crush_hash32_3(bucket->h.hash, x,
+ bucket->h.items[i], r);
+ u &= 0xffff;
+
+ /*
+ * for some reason slightly less than 0x10000 produces
+ * a slightly more accurate distribution... probably a
+ * rounding effect.
+ *
+ * the natural log lookup table maps [0,0xffff]
+ * (corresponding to real numbers [1/0x10000, 1] to
+ * [0, 0xffffffffffff] (corresponding to real numbers
+ * [-11.090355,0]).
+ */
+ ln = crush_ln(u) - 0x1000000000000ll;
+
+ /*
+ * divide by 16.16 fixed-point weight. note
+ * that the ln value is negative, so a larger
+ * weight means a larger (less negative) value
+ * for draw.
+ */
+ draw = div64_s64(ln, w);
+ } else {
+ draw = S64_MIN;
+ }
+
+ if (i == 0 || draw > high_draw) {
+ high = i;
+ high_draw = draw;
+ }
+ }
+ return bucket->h.items[high];
+}
+
+
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
case CRUSH_BUCKET_STRAW:
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
+ case CRUSH_BUCKET_STRAW2:
+ return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
+ x, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
+
/*
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map,
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
+ * @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
+ int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
+ int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r);
- for (rep = outpos; rep < numrep; rep++) {
+ for (rep = outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
- out2, outpos,
+ out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
@@ -463,6 +566,7 @@ reject:
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
+ count--;
}
dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
+ int out_size;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map,
x, numrep,
curstep->arg2,
o+osize, j,
+ result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map,
c+osize,
0);
} else {
+ out_size = ((numrep < (result_max-osize)) ?
+ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[-1-w[i]],
weight, weight_max,
- x, numrep, numrep,
+ x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_to_leaf,
c+osize,
0);
- osize += numrep;
+ osize += out_size;
}
}
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 14d9995097cc..593dc2eabcc8 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -22,6 +22,7 @@
* .../monmap - current monmap
* .../osdc - active osd requests
* .../monc - mon client state
+ * .../client_options - libceph-only (i.e. not rbd or cephfs) options
* .../dentry_lru - dump contents of dentry lru
* .../caps - expose cap (reservation) stats
* .../bdi - symlink to ../../bdi/something
@@ -177,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp)
return 0;
}
+static int client_options_show(struct seq_file *s, void *p)
+{
+ struct ceph_client *client = s->private;
+ int ret;
+
+ ret = ceph_print_client_options(s, client);
+ if (ret)
+ return ret;
+
+ seq_putc(s, '\n');
+ return 0;
+}
+
CEPH_DEFINE_SHOW_FUNC(monmap_show)
CEPH_DEFINE_SHOW_FUNC(osdmap_show)
CEPH_DEFINE_SHOW_FUNC(monc_show)
CEPH_DEFINE_SHOW_FUNC(osdc_show)
+CEPH_DEFINE_SHOW_FUNC(client_options_show)
int ceph_debugfs_init(void)
{
@@ -242,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client)
if (!client->debugfs_osdmap)
goto out;
+ client->debugfs_options = debugfs_create_file("client_options",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &client_options_show_fops);
+ if (!client->debugfs_options)
+ goto out;
+
return 0;
out:
@@ -252,6 +275,7 @@ out:
void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
dout("ceph_debugfs_client_cleanup %p\n", client);
+ debugfs_remove(client->debugfs_options);
debugfs_remove(client->debugfs_osdmap);
debugfs_remove(client->debugfs_monmap);
debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 6b3f54ed65ba..967080a9f043 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
IPPROTO_TCP, &sock);
if (ret)
return ret;
- sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC;
+ sock->sk->sk_allocation = GFP_NOFS;
#ifdef CONFIG_LOCKDEP
lockdep_set_class(&sock->sk->sk_lock, &socket_class);
@@ -505,8 +505,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
pr_err("connect %s error %d\n",
ceph_pr_addr(&con->peer_addr.in_addr), ret);
sock_release(sock);
- con->error_msg = "connect error";
-
return ret;
}
@@ -520,8 +518,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
ret);
}
- sk_set_memalloc(sock->sk);
-
con->sock = sock;
return 0;
}
@@ -2147,12 +2143,10 @@ static int process_connect(struct ceph_connection *con)
* to WAIT. This shouldn't happen if we are the
* client.
*/
- pr_err("process_connect got WAIT as client\n");
con->error_msg = "protocol error, got WAIT as client";
return -1;
default:
- pr_err("connect protocol error, will retry\n");
con->error_msg = "protocol error, garbage tag during connect";
return -1;
}
@@ -2284,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
if (cpu_to_le32(crc) != con->in_hdr.crc) {
- pr_err("read_partial_message bad hdr "
- " crc %u != expected %u\n",
+ pr_err("read_partial_message bad hdr crc %u != expected %u\n",
crc, con->in_hdr.crc);
return -EBADMSG;
}
@@ -2315,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con)
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
con->error_msg = "bad message sequence # for incoming message";
- return -EBADMSG;
+ return -EBADE;
}
/* allocate message? */
@@ -2662,6 +2655,8 @@ more:
switch (ret) {
case -EBADMSG:
con->error_msg = "bad crc";
+ /* fall through */
+ case -EBADE:
ret = -EIO;
break;
case -EIO:
@@ -2808,11 +2803,8 @@ static void con_work(struct work_struct *work)
{
struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work);
- unsigned long pflags = current->flags;
bool fault;
- current->flags |= PF_MEMALLOC;
-
mutex_lock(&con->mutex);
while (true) {
int ret;
@@ -2843,7 +2835,8 @@ static void con_work(struct work_struct *work)
if (ret < 0) {
if (ret == -EAGAIN)
continue;
- con->error_msg = "socket error on read";
+ if (!con->error_msg)
+ con->error_msg = "socket error on read";
fault = true;
break;
}
@@ -2852,7 +2845,8 @@ static void con_work(struct work_struct *work)
if (ret < 0) {
if (ret == -EAGAIN)
continue;
- con->error_msg = "socket error on write";
+ if (!con->error_msg)
+ con->error_msg = "socket error on write";
fault = true;
}
@@ -2866,8 +2860,6 @@ static void con_work(struct work_struct *work)
con_fault_finish(con);
con->ops->put(con);
-
- tsk_restore_flags(current, pflags, PF_MEMALLOC);
}
/*
@@ -2876,11 +2868,13 @@ static void con_work(struct work_struct *work)
*/
static void con_fault(struct ceph_connection *con)
{
- pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+ pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+ con->error_msg = NULL;
+
WARN_ON(con->state != CON_STATE_CONNECTING &&
con->state != CON_STATE_NEGOTIATING &&
con->state != CON_STATE_OPEN);
@@ -3302,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
*/
if (*skip)
return 0;
- con->error_msg = "error allocating memory for incoming message";
+ con->error_msg = "error allocating memory for incoming message";
return -ENOMEM;
}
memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index b8c3fde5b04f..15796696d64e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -122,6 +122,22 @@ bad:
return -EINVAL;
}
+static int crush_decode_straw2_bucket(void **p, void *end,
+ struct crush_bucket_straw2 *b)
+{
+ int j;
+ dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
+ b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+ if (b->item_weights == NULL)
+ return -ENOMEM;
+ ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
+ for (j = 0; j < b->h.size; j++)
+ b->item_weights[j] = ceph_decode_32(p);
+ return 0;
+bad:
+ return -EINVAL;
+}
+
static int skip_name_map(void **p, void *end)
{
int len;
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
case CRUSH_BUCKET_STRAW:
size = sizeof(struct crush_bucket_straw);
break;
+ case CRUSH_BUCKET_STRAW2:
+ size = sizeof(struct crush_bucket_straw2);
+ break;
default:
err = -EINVAL;
goto bad;
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
if (err < 0)
goto bad;
break;
+ case CRUSH_BUCKET_STRAW2:
+ err = crush_decode_straw2_bucket(p, end,
+ (struct crush_bucket_straw2 *)b);
+ if (err < 0)
+ goto bad;
+ break;
}
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 3b3965288f52..1796cef55ab5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1630,6 +1630,22 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
+#ifdef CONFIG_NET_CLS_ACT
+static struct static_key ingress_needed __read_mostly;
+
+void net_inc_ingress_queue(void)
+{
+ static_key_slow_inc(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
+
+void net_dec_ingress_queue(void)
+{
+ static_key_slow_dec(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
+#endif
+
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
/* We are not allowed to call static_key_slow_dec() from irq context
@@ -2697,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
if (unlikely(!skb))
goto out_null;
- if (netif_needs_gso(dev, skb, features)) {
+ if (netif_needs_gso(skb, features)) {
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
@@ -2879,7 +2895,7 @@ EXPORT_SYMBOL(xmit_recursion);
* dev_loopback_xmit - loop back @skb
* @skb: buffer to transmit
*/
-int dev_loopback_xmit(struct sk_buff *skb)
+int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
{
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
@@ -3017,11 +3033,11 @@ out:
return rc;
}
-int dev_queue_xmit(struct sk_buff *skb)
+int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
-EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(dev_queue_xmit_sk);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
{
@@ -3547,7 +3563,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
- goto out;
+ return skb;
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
@@ -3561,8 +3577,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
return NULL;
}
-out:
- skb->tc_verd = 0;
return skb;
}
#endif
@@ -3698,12 +3712,15 @@ another_round:
skip_taps:
#ifdef CONFIG_NET_CLS_ACT
- skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
- if (!skb)
- goto unlock;
+ if (static_key_false(&ingress_needed)) {
+ skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
+ goto unlock;
+ }
+
+ skb->tc_verd = 0;
ncls:
#endif
-
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
@@ -3853,13 +3870,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
-int netif_receive_skb(struct sk_buff *skb)
+int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
{
trace_netif_receive_skb_entry(skb);
return netif_receive_skb_internal(skb);
}
-EXPORT_SYMBOL(netif_receive_skb);
+EXPORT_SYMBOL(netif_receive_skb_sk);
/* Network device is going away, flush any packets still pending
* Called with irqs disabled.
diff --git a/net/core/filter.c b/net/core/filter.c
index b669e75d2b36..bf831a85c315 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
+/**
+ * bpf_skb_clone_not_writable - is the header of a clone not writable
+ * @skb: buffer to check
+ * @len: length up to which to write, can be negative
+ *
+ * Returns true if modifying the header part of the cloned buffer
+ * does require the data to be copied. I.e. this version works with
+ * negative lengths needed for eBPF case!
+ */
+static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
+{
+ return skb_header_cloned(skb) ||
+ (int) skb_headroom(skb) + len > skb->hdr_len;
+}
+
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- unsigned int offset = (unsigned int) r2;
+ int offset = (int) r2;
void *from = (void *) (long) r3;
unsigned int len = (unsigned int) r4;
char buf[16];
@@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
*
* so check for invalid 'offset' and too large 'len'
*/
- if (unlikely(offset > 0xffff || len > sizeof(buf)))
+ if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
return -EFAULT;
- if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+ offset -= skb->data - skb_mac_header(skb);
+ if (unlikely(skb_cloned(skb) &&
+ bpf_skb_clone_unwritable(skb, offset + len)))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f)
#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10)
-static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ int offset = (int) r2;
__sum16 sum, *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ offset -= skb->data - skb_mac_header(skb);
+ if (unlikely(skb_cloned(skb) &&
+ bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
-static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ int offset = (int) r2;
__sum16 sum, *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ offset -= skb->data - skb_mac_header(skb);
+ if (unlikely(skb_cloned(skb) &&
+ bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e7345d9031df..78fc04ad36fc 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -16,7 +16,6 @@
#include <linux/export.h>
#include <linux/user_namespace.h>
#include <linux/net_namespace.h>
-#include <linux/rtnetlink.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
@@ -148,9 +147,11 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+ int id);
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
- int min = 0, max = 0;
+ int min = 0, max = 0, id;
ASSERT_RTNL();
@@ -159,7 +160,11 @@ static int alloc_netid(struct net *net, struct net *peer, int reqid)
max = reqid + 1;
}
- return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+ id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+ if (id >= 0)
+ rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);
+
+ return id;
}
/* This function is used by idr_for_each(). If net is equal to peer, the
@@ -359,8 +364,10 @@ static void cleanup_net(struct work_struct *work)
for_each_net(tmp) {
int id = __peernet2id(tmp, net, false);
- if (id >= 0)
+ if (id >= 0) {
+ rtnl_net_notifyid(tmp, net, RTM_DELNSID, id);
idr_remove(&tmp->netns_ids, id);
+ }
}
idr_destroy(&net->netns_ids);
@@ -531,7 +538,8 @@ static int rtnl_net_get_size(void)
}
static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
- int cmd, struct net *net, struct net *peer)
+ int cmd, struct net *net, struct net *peer,
+ int nsid)
{
struct nlmsghdr *nlh;
struct rtgenmsg *rth;
@@ -546,9 +554,13 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
rth = nlmsg_data(nlh);
rth->rtgen_family = AF_UNSPEC;
- id = __peernet2id(net, peer, false);
- if (id < 0)
- id = NETNSA_NSID_NOT_ASSIGNED;
+ if (nsid >= 0) {
+ id = nsid;
+ } else {
+ id = __peernet2id(net, peer, false);
+ if (id < 0)
+ id = NETNSA_NSID_NOT_ASSIGNED;
+ }
if (nla_put_s32(skb, NETNSA_NSID, id))
goto nla_put_failure;
@@ -565,8 +577,8 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
struct sk_buff *msg;
- int err = -ENOBUFS;
struct net *peer;
+ int err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
rtnl_net_policy);
@@ -589,7 +601,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
}
err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_GETNSID, net, peer);
+ RTM_GETNSID, net, peer, -1);
if (err < 0)
goto err_out;
@@ -603,6 +615,75 @@ out:
return err;
}
+struct rtnl_net_dump_cb {
+ struct net *net;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ int idx;
+ int s_idx;
+};
+
+static int rtnl_net_dumpid_one(int id, void *peer, void *data)
+{
+ struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
+ int ret;
+
+ if (net_cb->idx < net_cb->s_idx)
+ goto cont;
+
+ ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
+ net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWNSID, net_cb->net, peer, id);
+ if (ret < 0)
+ return ret;
+
+cont:
+ net_cb->idx++;
+ return 0;
+}
+
+static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct rtnl_net_dump_cb net_cb = {
+ .net = net,
+ .skb = skb,
+ .cb = cb,
+ .idx = 0,
+ .s_idx = cb->args[0],
+ };
+
+ ASSERT_RTNL();
+
+ idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+
+ cb->args[0] = net_cb.idx;
+ return skb->len;
+}
+
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+ int id)
+{
+ struct sk_buff *msg;
+ int err = -ENOMEM;
+
+ msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id);
+ if (err < 0)
+ goto err_out;
+
+ rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+ return;
+
+err_out:
+ nlmsg_free(msg);
+out:
+ rtnl_set_sk_err(net, RTNLGRP_NSID, err);
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -637,7 +718,8 @@ static int __init net_ns_init(void)
register_pernet_subsys(&net_ns_ops);
rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
+ NULL);
return 0;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5e02260b087f..358d52a38533 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -818,7 +818,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
nla_total_size(sizeof(struct ifla_vf_rate)) +
- nla_total_size(sizeof(struct ifla_vf_link_state)));
+ nla_total_size(sizeof(struct ifla_vf_link_state)) +
+ nla_total_size(sizeof(struct ifla_vf_rss_query_en)));
return size;
} else
return 0;
@@ -1132,14 +1133,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_link_state vf_linkstate;
+ struct ifla_vf_rss_query_en vf_rss_query_en;
/*
* Not all SR-IOV capable drivers support the
- * spoofcheck query. Preset to -1 so the user
- * space tool can detect that the driver didn't
- * report anything.
+ * spoofcheck and "RSS query enable" query. Preset to
+ * -1 so the user space tool can detect that the driver
+ * didn't report anything.
*/
ivi.spoofchk = -1;
+ ivi.rss_query_en = -1;
memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1152,7 +1155,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
- vf_linkstate.vf = ivi.vf;
+ vf_linkstate.vf =
+ vf_rss_query_en.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
@@ -1162,6 +1166,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_rate.max_tx_rate = ivi.max_tx_rate;
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
+ vf_rss_query_en.setting = ivi.rss_query_en;
vf = nla_nest_start(skb, IFLA_VF_INFO);
if (!vf) {
nla_nest_cancel(skb, vfinfo);
@@ -1176,7 +1181,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
&vf_spoofchk) ||
nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
- &vf_linkstate))
+ &vf_linkstate) ||
+ nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+ sizeof(vf_rss_query_en),
+ &vf_rss_query_en))
goto nla_put_failure;
nla_nest_end(skb, vf);
}
@@ -1290,6 +1298,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) },
[IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
[IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
+ [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1500,6 +1509,17 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
ivl->link_state);
break;
}
+ case IFLA_VF_RSS_QUERY_EN: {
+ struct ifla_vf_rss_query_en *ivrssq_en;
+
+ ivrssq_en = nla_data(vf);
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rss_query_en)
+ err = ops->ndo_set_vf_rss_query_en(dev,
+ ivrssq_en->vf,
+ ivrssq_en->setting);
+ break;
+ }
default:
err = -EINVAL;
break;
@@ -1932,7 +1952,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
EXPORT_SYMBOL(rtnl_configure_link);
struct net_device *rtnl_create_link(struct net *net,
- char *ifname, unsigned char name_assign_type,
+ const char *ifname, unsigned char name_assign_type,
const struct rtnl_link_ops *ops, struct nlattr *tb[])
{
int err;
@@ -2404,7 +2424,7 @@ EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
- u8 *addr, u32 pid, u32 seq,
+ u8 *addr, u16 vid, u32 pid, u32 seq,
int type, unsigned int flags,
int nlflags)
{
@@ -2426,6 +2446,9 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
goto nla_put_failure;
+ if (vid)
+ if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2440,7 +2463,7 @@ static inline size_t rtnl_fdb_nlmsg_size(void)
return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
}
-static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type)
+static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2450,7 +2473,8 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type)
if (!skb)
goto errout;
- err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0);
+ err = nlmsg_populate_fdb_fill(skb, dev, addr, vid,
+ 0, 0, type, NTF_SELF, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -2585,7 +2609,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
nlh->nlmsg_flags);
if (!err) {
- rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH);
+ rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -2686,7 +2710,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
if (!err) {
- rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
+ rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -2711,7 +2735,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
if (*idx < cb->args[0])
goto skip;
- err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
+ err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
portid, seq,
RTM_NEWNEIGH, NTF_SELF,
NLM_F_MULTI);
@@ -2754,7 +2778,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net_device *dev;
struct nlattr *tb[IFLA_MAX+1];
- struct net_device *bdev = NULL;
struct net_device *br_dev = NULL;
const struct net_device_ops *ops = NULL;
const struct net_device_ops *cops = NULL;
@@ -2778,7 +2801,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
return -ENODEV;
ops = br_dev->netdev_ops;
- bdev = br_dev;
}
for_each_netdev(net, dev) {
@@ -2791,7 +2813,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
cops = br_dev->netdev_ops;
}
- bdev = dev;
} else {
if (dev != br_dev &&
!(dev->priv_flags & IFF_BRIDGE_PORT))
@@ -2801,7 +2822,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
!(dev->priv_flags & IFF_EBRIDGE))
continue;
- bdev = br_dev;
cops = ops;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cdb939b731aa..d1967dab9cc6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3752,7 +3752,6 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
-
/**
* skb_partial_csum_set - set up and verify partial csum values for packet
* @skb: the skb to set
@@ -4125,19 +4124,21 @@ EXPORT_SYMBOL(skb_try_coalesce);
*/
void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
- if (xnet)
- skb_orphan(skb);
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
skb_dst_drop(skb);
- skb->mark = 0;
skb_sender_cpu_clear(skb);
- skb_init_secmark(skb);
secpath_reset(skb);
nf_reset(skb);
nf_reset_trace(skb);
+
+ if (!xnet)
+ return;
+
+ skb_orphan(skb);
+ skb->mark = 0;
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);
diff --git a/net/core/sock.c b/net/core/sock.c
index 654e38a99759..e891bcf325ca 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2799,8 +2799,7 @@ int proto_register(struct proto *prot, int alloc_slab)
kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size,
0,
- SLAB_HWCACHE_ALIGN |
- prot->slab_flags,
+ prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
goto out_free_timewait_sock_slab_name;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 332f7d6d9942..5f566663e47f 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,28 +27,16 @@
struct inet_timewait_death_row dccp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
- .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
.hashinfo = &dccp_hashinfo,
- .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
- (unsigned long)&dccp_death_row),
- .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
- inet_twdr_twkill_work),
-/* Short-time timewait calendar */
-
- .twcal_hand = -1,
- .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
- (unsigned long)&dccp_death_row),
};
EXPORT_SYMBOL_GPL(dccp_death_row);
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
- struct inet_timewait_sock *tw = NULL;
+ struct inet_timewait_sock *tw;
- if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
- tw = inet_twsk_alloc(sk, state);
+ tw = inet_twsk_alloc(sk, &dccp_death_row, state);
if (tw != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -71,8 +59,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
- inet_twsk_schedule(tw, &dccp_death_row, timeo,
- DCCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, timeo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index be1f08cdad29..4507b188fc51 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
return err;
}
-static int dn_neigh_output_packet(struct sk_buff *skb)
+static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
@@ -206,7 +206,8 @@ static int dn_neigh_output_packet(struct sk_buff *skb)
/*
* For talking to broadcast devices: Ethernet & PPP
*/
-static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb)
+static int dn_long_output(struct neighbour *neigh, struct sock *sk,
+ struct sk_buff *skb)
{
struct net_device *dev = neigh->dev;
int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3;
@@ -245,14 +246,15 @@ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb)
skb_reset_network_header(skb);
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
- neigh->dev, dn_neigh_output_packet);
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
+ NULL, neigh->dev, dn_neigh_output_packet);
}
/*
* For talking to pointopoint and multidrop devices: DDCMP and X.25
*/
-static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb)
+static int dn_short_output(struct neighbour *neigh, struct sock *sk,
+ struct sk_buff *skb)
{
struct net_device *dev = neigh->dev;
int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
@@ -284,8 +286,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb)
skb_reset_network_header(skb);
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
- neigh->dev, dn_neigh_output_packet);
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
+ NULL, neigh->dev, dn_neigh_output_packet);
}
/*
@@ -293,7 +295,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb)
* Phase 3 output is the same as short output, execpt that
* it clears the area bits before transmission.
*/
-static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb)
+static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
+ struct sk_buff *skb)
{
struct net_device *dev = neigh->dev;
int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
@@ -324,11 +327,11 @@ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb)
skb_reset_network_header(skb);
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
- neigh->dev, dn_neigh_output_packet);
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
+ NULL, neigh->dev, dn_neigh_output_packet);
}
-int dn_to_neigh_output(struct sk_buff *skb)
+int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *) dst;
@@ -347,11 +350,11 @@ int dn_to_neigh_output(struct sk_buff *skb)
rcu_read_unlock();
if (dn->flags & DN_NDFLAG_P3)
- return dn_phase3_output(neigh, skb);
+ return dn_phase3_output(neigh, sk, skb);
if (use_long)
- return dn_long_output(neigh, skb);
+ return dn_long_output(neigh, sk, skb);
else
- return dn_short_output(neigh, skb);
+ return dn_short_output(neigh, sk, skb);
}
/*
@@ -372,7 +375,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb)
/*
* Ethernet router hello message received
*/
-int dn_neigh_router_hello(struct sk_buff *skb)
+int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
{
struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
@@ -434,7 +437,7 @@ int dn_neigh_router_hello(struct sk_buff *skb)
/*
* Endnode hello message received
*/
-int dn_neigh_endnode_hello(struct sk_buff *skb)
+int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb)
{
struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
struct neighbour *neigh;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index fe5f01485d33..a321eac9fd0c 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -714,7 +714,7 @@ out:
return ret;
}
-static int dn_nsp_rx_packet(struct sk_buff *skb)
+static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
struct sock *sk = NULL;
@@ -814,7 +814,8 @@ free_out:
int dn_nsp_rx(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, skb, skb->dev, NULL,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb,
+ skb->dev, NULL,
dn_nsp_rx_packet);
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 9ab0c4ba297f..03227ffd19ce 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb)
*
* Returns: result of input function if route is found, error code otherwise
*/
-static int dn_route_rx_packet(struct sk_buff *skb)
+static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb)
{
struct dn_skb_cb *cb;
int err;
@@ -573,7 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
ptr++;
cb->hops = *ptr++; /* Visit Count */
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
+ skb->dev, NULL,
dn_route_rx_packet);
drop_it:
@@ -600,7 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
ptr += 2;
cb->hops = *ptr & 0x3f;
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
+ skb->dev, NULL,
dn_route_rx_packet);
drop_it:
@@ -608,7 +610,7 @@ drop_it:
return NET_RX_DROP;
}
-static int dn_route_discard(struct sk_buff *skb)
+static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
{
/*
* I know we drop the packet here, but thats considered success in
@@ -618,7 +620,7 @@ static int dn_route_discard(struct sk_buff *skb)
return NET_RX_SUCCESS;
}
-static int dn_route_ptp_hello(struct sk_buff *skb)
+static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb)
{
dn_dev_hello(skb);
dn_neigh_pointopoint_hello(skb);
@@ -704,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
switch (flags & DN_RT_CNTL_MSK) {
case DN_RT_PKT_HELO:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- skb, skb->dev, NULL,
+ NULL, skb, skb->dev, NULL,
dn_route_ptp_hello);
case DN_RT_PKT_L1RT:
case DN_RT_PKT_L2RT:
return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
- skb, skb->dev, NULL,
+ NULL, skb, skb->dev, NULL,
dn_route_discard);
case DN_RT_PKT_ERTH:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- skb, skb->dev, NULL,
+ NULL, skb, skb->dev, NULL,
dn_neigh_router_hello);
case DN_RT_PKT_EEDH:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- skb, skb->dev, NULL,
+ NULL, skb, skb->dev, NULL,
dn_neigh_endnode_hello);
}
} else {
@@ -768,7 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb)
cb->rt_flags |= DN_RT_F_IE;
cb->hops = 0;
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb,
+ NULL, dev,
dn_to_neigh_output);
error:
@@ -816,7 +819,8 @@ static int dn_forward(struct sk_buff *skb)
if (rt->rt_flags & RTCF_DOREDIRECT)
cb->rt_flags |= DN_RT_F_IE;
- return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb,
+ dev, skb->dev,
dn_to_neigh_output);
drop:
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 5eaadabe23a1..079a224471e7 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -124,7 +124,7 @@ static ssize_t temp1_max_store(struct device *dev,
return count;
}
-static DEVICE_ATTR(temp1_max, S_IRUGO, temp1_max_show, temp1_max_store);
+static DEVICE_ATTR_RW(temp1_max);
static ssize_t temp1_max_alarm_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -159,8 +159,8 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj,
if (index == 1) {
if (!drv->get_temp_limit)
mode = 0;
- else if (drv->set_temp_limit)
- mode |= S_IWUSR;
+ else if (!drv->set_temp_limit)
+ mode &= ~S_IWUSR;
} else if (index == 2 && !drv->get_temp_alarm) {
mode = 0;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c6e67aa46c32..933a92820d26 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -591,7 +591,8 @@ EXPORT_SYMBOL(arp_create);
void arp_xmit(struct sk_buff *skb)
{
/* Send it off, maybe filter it using firewalling first. */
- NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
+ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb,
+ NULL, skb->dev, dev_queue_xmit_sk);
}
EXPORT_SYMBOL(arp_xmit);
@@ -625,7 +626,7 @@ EXPORT_SYMBOL(arp_send);
* Process an arp request.
*/
-static int arp_process(struct sk_buff *skb)
+static int arp_process(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -846,7 +847,7 @@ out:
static void parp_redo(struct sk_buff *skb)
{
- arp_process(skb);
+ arp_process(NULL, skb);
}
@@ -879,7 +880,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
- return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+ return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb,
+ dev, NULL, arp_process);
consumeskb:
consume_skb(skb);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index ff069f6597ac..34968cd5c146 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -16,14 +16,12 @@
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>
-static DEFINE_SPINLOCK(fou_lock);
-static LIST_HEAD(fou_list);
-
struct fou {
struct socket *sock;
u8 protocol;
u8 flags;
- u16 port;
+ __be16 port;
+ u16 type;
struct udp_offload udp_offloads;
struct list_head list;
};
@@ -37,6 +35,13 @@ struct fou_cfg {
struct udp_port_cfg udp_config;
};
+static unsigned int fou_net_id;
+
+struct fou_net {
+ struct list_head fou_list;
+ struct mutex fou_lock;
+};
+
static inline struct fou *fou_from_sock(struct sock *sk)
{
return sk->sk_user_data;
@@ -387,20 +392,21 @@ out_unlock:
return err;
}
-static int fou_add_to_port_list(struct fou *fou)
+static int fou_add_to_port_list(struct net *net, struct fou *fou)
{
+ struct fou_net *fn = net_generic(net, fou_net_id);
struct fou *fout;
- spin_lock(&fou_lock);
- list_for_each_entry(fout, &fou_list, list) {
+ mutex_lock(&fn->fou_lock);
+ list_for_each_entry(fout, &fn->fou_list, list) {
if (fou->port == fout->port) {
- spin_unlock(&fou_lock);
+ mutex_unlock(&fn->fou_lock);
return -EALREADY;
}
}
- list_add(&fou->list, &fou_list);
- spin_unlock(&fou_lock);
+ list_add(&fou->list, &fn->fou_list);
+ mutex_unlock(&fn->fou_lock);
return 0;
}
@@ -410,14 +416,10 @@ static void fou_release(struct fou *fou)
struct socket *sock = fou->sock;
struct sock *sk = sock->sk;
- udp_del_offload(&fou->udp_offloads);
-
+ if (sk->sk_family == AF_INET)
+ udp_del_offload(&fou->udp_offloads);
list_del(&fou->list);
-
- /* Remove hooks into tunnel socket */
- sk->sk_user_data = NULL;
-
- sock_release(sock);
+ udp_tunnel_sock_release(sock);
kfree(fou);
}
@@ -447,10 +449,10 @@ static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
static int fou_create(struct net *net, struct fou_cfg *cfg,
struct socket **sockp)
{
- struct fou *fou = NULL;
- int err;
struct socket *sock = NULL;
+ struct fou *fou = NULL;
struct sock *sk;
+ int err;
/* Open UDP socket */
err = udp_sock_create(net, &cfg->udp_config, &sock);
@@ -486,6 +488,8 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
goto error;
}
+ fou->type = cfg->type;
+
udp_sk(sk)->encap_type = 1;
udp_encap_enable();
@@ -502,7 +506,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
goto error;
}
- err = fou_add_to_port_list(fou);
+ err = fou_add_to_port_list(net, fou);
if (err)
goto error;
@@ -514,27 +518,27 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
error:
kfree(fou);
if (sock)
- sock_release(sock);
+ udp_tunnel_sock_release(sock);
return err;
}
static int fou_destroy(struct net *net, struct fou_cfg *cfg)
{
- struct fou *fou;
- u16 port = cfg->udp_config.local_udp_port;
+ struct fou_net *fn = net_generic(net, fou_net_id);
+ __be16 port = cfg->udp_config.local_udp_port;
int err = -EINVAL;
+ struct fou *fou;
- spin_lock(&fou_lock);
- list_for_each_entry(fou, &fou_list, list) {
+ mutex_lock(&fn->fou_lock);
+ list_for_each_entry(fou, &fn->fou_list, list) {
if (fou->port == port) {
- udp_del_offload(&fou->udp_offloads);
fou_release(fou);
err = 0;
break;
}
}
- spin_unlock(&fou_lock);
+ mutex_unlock(&fn->fou_lock);
return err;
}
@@ -573,7 +577,7 @@ static int parse_nl_config(struct genl_info *info,
}
if (info->attrs[FOU_ATTR_PORT]) {
- u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]);
+ __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
cfg->udp_config.local_udp_port = port;
}
@@ -592,6 +596,7 @@ static int parse_nl_config(struct genl_info *info,
static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = genl_info_net(info);
struct fou_cfg cfg;
int err;
@@ -599,16 +604,119 @@ static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- return fou_create(&init_net, &cfg, NULL);
+ return fou_create(net, &cfg, NULL);
}
static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = genl_info_net(info);
struct fou_cfg cfg;
+ int err;
- parse_nl_config(info, &cfg);
+ err = parse_nl_config(info, &cfg);
+ if (err)
+ return err;
- return fou_destroy(&init_net, &cfg);
+ return fou_destroy(net, &cfg);
+}
+
+static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
+{
+ if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
+ nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
+ nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
+ nla_put_u8(msg, FOU_ATTR_TYPE, fou->type))
+ return -1;
+
+ if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
+ if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
+ return -1;
+ return 0;
+}
+
+static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
+ u32 flags, struct sk_buff *skb, u8 cmd)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (fou_fill_info(fou, skb) < 0)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct fou_net *fn = net_generic(net, fou_net_id);
+ struct sk_buff *msg;
+ struct fou_cfg cfg;
+ struct fou *fout;
+ __be16 port;
+ int ret;
+
+ ret = parse_nl_config(info, &cfg);
+ if (ret)
+ return ret;
+ port = cfg.udp_config.local_udp_port;
+ if (port == 0)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = -ESRCH;
+ mutex_lock(&fn->fou_lock);
+ list_for_each_entry(fout, &fn->fou_list, list) {
+ if (port == fout->port) {
+ ret = fou_dump_info(fout, info->snd_portid,
+ info->snd_seq, 0, msg,
+ info->genlhdr->cmd);
+ break;
+ }
+ }
+ mutex_unlock(&fn->fou_lock);
+ if (ret < 0)
+ goto out_free;
+
+ return genlmsg_reply(msg, info);
+
+out_free:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct fou_net *fn = net_generic(net, fou_net_id);
+ struct fou *fout;
+ int idx = 0, ret;
+
+ mutex_lock(&fn->fou_lock);
+ list_for_each_entry(fout, &fn->fou_list, list) {
+ if (idx++ < cb->args[0])
+ continue;
+ ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ skb, FOU_CMD_GET);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&fn->fou_lock);
+
+ cb->args[0] = idx;
+ return skb->len;
}
static const struct genl_ops fou_nl_ops[] = {
@@ -624,6 +732,12 @@ static const struct genl_ops fou_nl_ops[] = {
.policy = fou_nl_policy,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = FOU_CMD_GET,
+ .doit = fou_nl_cmd_get_port,
+ .dumpit = fou_nl_dump,
+ .policy = fou_nl_policy,
+ },
};
size_t fou_encap_hlen(struct ip_tunnel_encap *e)
@@ -771,12 +885,12 @@ EXPORT_SYMBOL(gue_build_header);
#ifdef CONFIG_NET_FOU_IP_TUNNELS
-static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = {
+static const struct ip_tunnel_encap_ops fou_iptun_ops = {
.encap_hlen = fou_encap_hlen,
.build_header = fou_build_header,
};
-static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = {
+static const struct ip_tunnel_encap_ops gue_iptun_ops = {
.encap_hlen = gue_encap_hlen,
.build_header = gue_build_header,
};
@@ -820,38 +934,63 @@ static void ip_tunnel_encap_del_fou_ops(void)
#endif
+static __net_init int fou_init_net(struct net *net)
+{
+ struct fou_net *fn = net_generic(net, fou_net_id);
+
+ INIT_LIST_HEAD(&fn->fou_list);
+ mutex_init(&fn->fou_lock);
+ return 0;
+}
+
+static __net_exit void fou_exit_net(struct net *net)
+{
+ struct fou_net *fn = net_generic(net, fou_net_id);
+ struct fou *fou, *next;
+
+ /* Close all the FOU sockets */
+ mutex_lock(&fn->fou_lock);
+ list_for_each_entry_safe(fou, next, &fn->fou_list, list)
+ fou_release(fou);
+ mutex_unlock(&fn->fou_lock);
+}
+
+static struct pernet_operations fou_net_ops = {
+ .init = fou_init_net,
+ .exit = fou_exit_net,
+ .id = &fou_net_id,
+ .size = sizeof(struct fou_net),
+};
+
static int __init fou_init(void)
{
int ret;
+ ret = register_pernet_device(&fou_net_ops);
+ if (ret)
+ goto exit;
+
ret = genl_register_family_with_ops(&fou_nl_family,
fou_nl_ops);
-
if (ret < 0)
- goto exit;
+ goto unregister;
ret = ip_tunnel_encap_add_fou_ops();
- if (ret < 0)
- genl_unregister_family(&fou_nl_family);
+ if (ret == 0)
+ return 0;
+ genl_unregister_family(&fou_nl_family);
+unregister:
+ unregister_pernet_device(&fou_net_ops);
exit:
return ret;
}
static void __exit fou_fini(void)
{
- struct fou *fou, *next;
-
ip_tunnel_encap_del_fou_ops();
-
genl_unregister_family(&fou_nl_family);
-
- /* Close all the FOU sockets */
-
- spin_lock(&fou_lock);
- list_for_each_entry_safe(fou, next, &fou_list, list)
- fou_release(fou);
- spin_unlock(&fou_lock);
+ unregister_pernet_device(&fou_net_ops);
}
module_init(fou_init);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index e64f8e9785d1..8986e63f3bda 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
int min_headroom;
int err;
- skb = udp_tunnel_handle_offloads(skb, csum);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@ -131,12 +127,16 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
if (unlikely(!skb))
return -ENOMEM;
+ skb = udp_tunnel_handle_offloads(skb, csum);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
- return udp_tunnel_xmit_skb(rt, skb, src, dst,
+ return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst,
tos, ttl, df, src_port, dst_port, xnet,
!csum);
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 76322c9867d5..bb77ebdae3b3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -111,6 +111,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
const struct nlmsghdr *unlh)
{
const struct inet_sock *inet = inet_sk(sk);
+ const struct tcp_congestion_ops *ca_ops;
const struct inet_diag_handler *handler;
int ext = req->idiag_ext;
struct inet_diag_msg *r;
@@ -208,16 +209,31 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
info = nla_data(attr);
}
- if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops)
- if (nla_put_string(skb, INET_DIAG_CONG,
- icsk->icsk_ca_ops->name) < 0)
+ if (ext & (1 << (INET_DIAG_CONG - 1))) {
+ int err = 0;
+
+ rcu_read_lock();
+ ca_ops = READ_ONCE(icsk->icsk_ca_ops);
+ if (ca_ops)
+ err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
+ rcu_read_unlock();
+ if (err < 0)
goto errout;
+ }
handler->idiag_get_info(sk, r, info);
- if (sk->sk_state < TCP_TIME_WAIT &&
- icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
- icsk->icsk_ca_ops->get_info(sk, ext, skb);
+ if (sk->sk_state < TCP_TIME_WAIT) {
+ int err = 0;
+
+ rcu_read_lock();
+ ca_ops = READ_ONCE(icsk->icsk_ca_ops);
+ if (ca_ops && ca_ops->get_info)
+ err = ca_ops->get_info(sk, ext, skb);
+ rcu_read_unlock();
+ if (err < 0)
+ goto errout;
+ }
out:
nlmsg_end(skb, nlh);
@@ -248,7 +264,7 @@ static int inet_twsk_diag_fill(struct sock *sk,
struct inet_timewait_sock *tw = inet_twsk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
- s32 tmo;
+ long tmo;
nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
nlmsg_flags);
@@ -258,7 +274,7 @@ static int inet_twsk_diag_fill(struct sock *sk,
r = nlmsg_data(nlh);
BUG_ON(tw->tw_state != TCP_TIME_WAIT);
- tmo = tw->tw_ttd - inet_tw_time_stamp();
+ tmo = tw->tw_timer.expires - jiffies;
if (tmo < 0)
tmo = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d4630bf2d9aa..c6fb80bd5826 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -388,7 +388,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
}
@@ -565,7 +565,7 @@ ok:
spin_unlock(&head->lock);
if (tw) {
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
while (twrefcnt) {
twrefcnt--;
inet_twsk_put(tw);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 118f0f195820..00ec8d5d7e7e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -67,9 +67,9 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
}
/* Must be called with locally disabled BHs. */
-static void __inet_twsk_kill(struct inet_timewait_sock *tw,
- struct inet_hashinfo *hashinfo)
+static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
+ struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
struct inet_bind_hashbucket *bhead;
int refcnt;
/* Unlink from established hashes. */
@@ -89,6 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
atomic_sub(refcnt, &tw->tw_refcnt);
+ atomic_dec(&tw->tw_dr->tw_count);
+ inet_twsk_put(tw);
}
void inet_twsk_free(struct inet_timewait_sock *tw)
@@ -168,16 +170,34 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
+void tw_timer_handler(unsigned long data)
{
- struct inet_timewait_sock *tw =
- kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
- GFP_ATOMIC);
+ struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+
+ if (tw->tw_kill)
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
+ else
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
+ inet_twsk_kill(tw);
+}
+
+struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
+ struct inet_timewait_death_row *dr,
+ const int state)
+{
+ struct inet_timewait_sock *tw;
+
+ if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+ return NULL;
+
+ tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+ GFP_ATOMIC);
if (tw) {
const struct inet_sock *inet = inet_sk(sk);
kmemcheck_annotate_bitfield(tw, flags);
+ tw->tw_dr = dr;
/* Give us an identity. */
tw->tw_daddr = inet->inet_daddr;
tw->tw_rcv_saddr = inet->inet_rcv_saddr;
@@ -196,13 +216,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
+ setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw);
/*
* Because we use RCU lookups, we should not set tw_refcnt
* to a non null value before everything is setup for this
* timewait socket.
*/
atomic_set(&tw->tw_refcnt, 0);
- inet_twsk_dead_node_init(tw);
+
__module_get(tw->tw_prot->owner);
}
@@ -210,139 +231,20 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
}
EXPORT_SYMBOL_GPL(inet_twsk_alloc);
-/* Returns non-zero if quota exceeded. */
-static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
- const int slot)
-{
- struct inet_timewait_sock *tw;
- unsigned int killed;
- int ret;
-
- /* NOTE: compare this to previous version where lock
- * was released after detaching chain. It was racy,
- * because tw buckets are scheduled in not serialized context
- * in 2.3 (with netfilter), and with softnet it is common, because
- * soft irqs are not sequenced.
- */
- killed = 0;
- ret = 0;
-rescan:
- inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) {
- __inet_twsk_del_dead_node(tw);
- spin_unlock(&twdr->death_lock);
- __inet_twsk_kill(tw, twdr->hashinfo);
-#ifdef CONFIG_NET_NS
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
-#endif
- inet_twsk_put(tw);
- killed++;
- spin_lock(&twdr->death_lock);
- if (killed > INET_TWDR_TWKILL_QUOTA) {
- ret = 1;
- break;
- }
-
- /* While we dropped twdr->death_lock, another cpu may have
- * killed off the next TW bucket in the list, therefore
- * do a fresh re-read of the hlist head node with the
- * lock reacquired. We still use the hlist traversal
- * macro in order to get the prefetches.
- */
- goto rescan;
- }
-
- twdr->tw_count -= killed;
-#ifndef CONFIG_NET_NS
- NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);
-#endif
- return ret;
-}
-
-void inet_twdr_hangman(unsigned long data)
-{
- struct inet_timewait_death_row *twdr;
- unsigned int need_timer;
-
- twdr = (struct inet_timewait_death_row *)data;
- spin_lock(&twdr->death_lock);
-
- if (twdr->tw_count == 0)
- goto out;
-
- need_timer = 0;
- if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
- twdr->thread_slots |= (1 << twdr->slot);
- schedule_work(&twdr->twkill_work);
- need_timer = 1;
- } else {
- /* We purged the entire slot, anything left? */
- if (twdr->tw_count)
- need_timer = 1;
- twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
- }
- if (need_timer)
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
-out:
- spin_unlock(&twdr->death_lock);
-}
-EXPORT_SYMBOL_GPL(inet_twdr_hangman);
-
-void inet_twdr_twkill_work(struct work_struct *work)
-{
- struct inet_timewait_death_row *twdr =
- container_of(work, struct inet_timewait_death_row, twkill_work);
- int i;
-
- BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) >
- (sizeof(twdr->thread_slots) * 8));
-
- while (twdr->thread_slots) {
- spin_lock_bh(&twdr->death_lock);
- for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
- if (!(twdr->thread_slots & (1 << i)))
- continue;
-
- while (inet_twdr_do_twkill_work(twdr, i) != 0) {
- if (need_resched()) {
- spin_unlock_bh(&twdr->death_lock);
- schedule();
- spin_lock_bh(&twdr->death_lock);
- }
- }
-
- twdr->thread_slots &= ~(1 << i);
- }
- spin_unlock_bh(&twdr->death_lock);
- }
-}
-EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
-
/* These are always called from BH context. See callers in
* tcp_input.c to verify this.
*/
/* This is for handling early-kills of TIME_WAIT sockets. */
-void inet_twsk_deschedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr)
+void inet_twsk_deschedule(struct inet_timewait_sock *tw)
{
- spin_lock(&twdr->death_lock);
- if (inet_twsk_del_dead_node(tw)) {
- inet_twsk_put(tw);
- if (--twdr->tw_count == 0)
- del_timer(&twdr->tw_timer);
- }
- spin_unlock(&twdr->death_lock);
- __inet_twsk_kill(tw, twdr->hashinfo);
+ if (del_timer_sync(&tw->tw_timer))
+ inet_twsk_kill(tw);
}
EXPORT_SYMBOL(inet_twsk_deschedule);
-void inet_twsk_schedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr,
- const int timeo, const int timewait_len)
+void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
{
- struct hlist_head *list;
- int slot;
-
/* timeout := RTO * 3.5
*
* 3.5 = 1+2+0.5 to wait for two retransmits.
@@ -367,115 +269,15 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw,
* is greater than TS tick!) and detect old duplicates with help
* of PAWS.
*/
- slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
- spin_lock(&twdr->death_lock);
-
- /* Unlink it, if it was scheduled */
- if (inet_twsk_del_dead_node(tw))
- twdr->tw_count--;
- else
+ tw->tw_kill = timeo <= 4*HZ;
+ if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) {
atomic_inc(&tw->tw_refcnt);
-
- if (slot >= INET_TWDR_RECYCLE_SLOTS) {
- /* Schedule to slow timer */
- if (timeo >= timewait_len) {
- slot = INET_TWDR_TWKILL_SLOTS - 1;
- } else {
- slot = DIV_ROUND_UP(timeo, twdr->period);
- if (slot >= INET_TWDR_TWKILL_SLOTS)
- slot = INET_TWDR_TWKILL_SLOTS - 1;
- }
- tw->tw_ttd = inet_tw_time_stamp() + timeo;
- slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
- list = &twdr->cells[slot];
- } else {
- tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK);
-
- if (twdr->twcal_hand < 0) {
- twdr->twcal_hand = 0;
- twdr->twcal_jiffie = jiffies;
- twdr->twcal_timer.expires = twdr->twcal_jiffie +
- (slot << INET_TWDR_RECYCLE_TICK);
- add_timer(&twdr->twcal_timer);
- } else {
- if (time_after(twdr->twcal_timer.expires,
- jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
- mod_timer(&twdr->twcal_timer,
- jiffies + (slot << INET_TWDR_RECYCLE_TICK));
- slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
- }
- list = &twdr->twcal_row[slot];
+ atomic_inc(&tw->tw_dr->tw_count);
}
-
- hlist_add_head(&tw->tw_death_node, list);
-
- if (twdr->tw_count++ == 0)
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
- spin_unlock(&twdr->death_lock);
}
EXPORT_SYMBOL_GPL(inet_twsk_schedule);
-void inet_twdr_twcal_tick(unsigned long data)
-{
- struct inet_timewait_death_row *twdr;
- int n, slot;
- unsigned long j;
- unsigned long now = jiffies;
- int killed = 0;
- int adv = 0;
-
- twdr = (struct inet_timewait_death_row *)data;
-
- spin_lock(&twdr->death_lock);
- if (twdr->twcal_hand < 0)
- goto out;
-
- slot = twdr->twcal_hand;
- j = twdr->twcal_jiffie;
-
- for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
- if (time_before_eq(j, now)) {
- struct hlist_node *safe;
- struct inet_timewait_sock *tw;
-
- inet_twsk_for_each_inmate_safe(tw, safe,
- &twdr->twcal_row[slot]) {
- __inet_twsk_del_dead_node(tw);
- __inet_twsk_kill(tw, twdr->hashinfo);
-#ifdef CONFIG_NET_NS
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
-#endif
- inet_twsk_put(tw);
- killed++;
- }
- } else {
- if (!adv) {
- adv = 1;
- twdr->twcal_jiffie = j;
- twdr->twcal_hand = slot;
- }
-
- if (!hlist_empty(&twdr->twcal_row[slot])) {
- mod_timer(&twdr->twcal_timer, j);
- goto out;
- }
- }
- j += 1 << INET_TWDR_RECYCLE_TICK;
- slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
- }
- twdr->twcal_hand = -1;
-
-out:
- if ((twdr->tw_count -= killed) == 0)
- del_timer(&twdr->tw_timer);
-#ifndef CONFIG_NET_NS
- NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);
-#endif
- spin_unlock(&twdr->death_lock);
-}
-EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
-
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
struct inet_timewait_death_row *twdr, int family)
{
@@ -509,7 +311,7 @@ restart:
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule(tw, twdr);
+ inet_twsk_deschedule(tw);
local_bh_enable();
inet_twsk_put(tw);
goto restart_rcu;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index d9bc28ac5d1b..3674484946a5 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -57,7 +57,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
}
-static int ip_forward_finish(struct sk_buff *skb)
+static int ip_forward_finish(struct sock *sk, struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -68,7 +68,7 @@ static int ip_forward_finish(struct sk_buff *skb)
ip_forward_options(skb);
skb_sender_cpu_clear(skb);
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
int ip_forward(struct sk_buff *skb)
@@ -82,6 +82,9 @@ int ip_forward(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
+ if (unlikely(skb->sk))
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -136,8 +139,8 @@ int ip_forward(struct sk_buff *skb)
skb->priority = rt_tos2priority(iph->tos);
- return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
- rt->dst.dev, ip_forward_finish);
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
+ skb->dev, rt->dst.dev, ip_forward_finish);
sr_failed:
/*
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2e0410ed8f16..2db4c8773c1b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -187,7 +187,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
return false;
}
-static int ip_local_deliver_finish(struct sk_buff *skb)
+static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
@@ -253,7 +253,8 @@ int ip_local_deliver(struct sk_buff *skb)
return 0;
}
- return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb,
+ skb->dev, NULL,
ip_local_deliver_finish);
}
@@ -309,7 +310,7 @@ drop:
int sysctl_ip_early_demux __read_mostly = 1;
EXPORT_SYMBOL(sysctl_ip_early_demux);
-static int ip_rcv_finish(struct sk_buff *skb)
+static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
@@ -451,7 +452,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
+ dev, NULL,
ip_rcv_finish);
csum_error:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 26f6f7956168..c65b93a7b711 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -91,14 +91,19 @@ void ip_send_check(struct iphdr *iph)
}
EXPORT_SYMBOL(ip_send_check);
-int __ip_local_out(struct sk_buff *skb)
+int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = htons(skb->len);
ip_send_check(iph);
- return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
- skb_dst(skb)->dev, dst_output);
+ return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL,
+ skb_dst(skb)->dev, dst_output_sk);
+}
+
+int __ip_local_out(struct sk_buff *skb)
+{
+ return __ip_local_out_sk(skb->sk, skb);
}
int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
@@ -163,7 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
}
EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
-static inline int ip_finish_output2(struct sk_buff *skb)
+static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst;
@@ -211,7 +216,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
-static int ip_finish_output_gso(struct sk_buff *skb)
+static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb)
{
netdev_features_t features;
struct sk_buff *segs;
@@ -220,7 +225,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
- return ip_finish_output2(skb);
+ return ip_finish_output2(sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
*
@@ -243,7 +248,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
int err;
segs->next = NULL;
- err = ip_fragment(segs, ip_finish_output2);
+ err = ip_fragment(sk, segs, ip_finish_output2);
if (err && ret == 0)
ret = err;
@@ -253,22 +258,22 @@ static int ip_finish_output_gso(struct sk_buff *skb)
return ret;
}
-static int ip_finish_output(struct sk_buff *skb)
+static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
#endif
if (skb_is_gso(skb))
- return ip_finish_output_gso(skb);
+ return ip_finish_output_gso(sk, skb);
if (skb->len > ip_skb_dst_mtu(skb))
- return ip_fragment(skb, ip_finish_output2);
+ return ip_fragment(sk, skb, ip_finish_output2);
- return ip_finish_output2(skb);
+ return ip_finish_output2(sk, skb);
}
int ip_mc_output(struct sock *sk, struct sk_buff *skb)
@@ -307,7 +312,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- newskb, NULL, newskb->dev,
+ sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
}
@@ -322,11 +327,11 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
if (rt->rt_flags&RTCF_BROADCAST) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
if (newskb)
- NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb,
NULL, newskb->dev, dev_loopback_xmit);
}
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL,
skb->dev, ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
@@ -340,7 +345,8 @@ int ip_output(struct sock *sk, struct sk_buff *skb)
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
+ NULL, dev,
ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
@@ -449,7 +455,6 @@ no_route:
}
EXPORT_SYMBOL(ip_queue_xmit);
-
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
to->pkt_type = from->pkt_type;
@@ -480,7 +485,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending.
*/
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph;
int ptr;
@@ -593,7 +599,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
ip_send_check(iph);
}
- err = output(skb);
+ err = output(sk, skb);
if (!err)
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
@@ -730,7 +736,7 @@ slow_path:
ip_send_check(iph);
- err = output(skb2);
+ err = output(sk, skb2);
if (err)
goto fail;
@@ -813,7 +819,6 @@ static inline int ip_ufo_append_data(struct sock *sk,
skb->csum = 0;
-
__skb_queue_tail(queue, skb);
} else if (skb_is_gso(skb)) {
goto append;
@@ -1211,7 +1216,6 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
}
-
while (size > 0) {
int i;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6d364ab8e14e..4c2c3ba4ba65 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -782,7 +782,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
return;
}
- err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
+ err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 8c4dcc46acd2..ce63ab21b6cd 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,8 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->daddr = dst;
iph->saddr = src;
iph->ttl = ttl;
- __ip_select_ident(sock_net(sk), iph, skb_shinfo(skb)->gso_segs ?: 1);
+ __ip_select_ident(dev_net(rt->dst.dev), iph,
+ skb_shinfo(skb)->gso_segs ?: 1);
err = ip_local_out_sk(sk, skb);
if (unlikely(net_xmit_eval(err)))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f17d0e78071..3a2c0162c3ba 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1679,7 +1679,7 @@ static void ip_encap(struct net *net, struct sk_buff *skb,
nf_reset(skb);
}
-static inline int ipmr_forward_finish(struct sk_buff *skb)
+static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -1689,7 +1689,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
/*
@@ -1788,7 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
* not mrouter) cannot join to more than one interface - it will
* result in receiving multiple packets.
*/
- NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
+ skb->dev, dev,
ipmr_forward_finish);
return;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index c5b794da51a9..3262e41ff76f 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -13,6 +13,7 @@
#include <net/dst.h>
#include <net/netfilter/ipv4/nf_reject.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv4/nf_reject.h>
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
@@ -146,7 +147,8 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = oldskb->nf_bridge->physindev;
+
+ nskb->dev = nf_bridge_get_physindev(oldskb);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 665de06561cd..40e414c4ca56 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -17,20 +17,17 @@
#include <net/netfilter/ipv4/nf_nat_masquerade.h>
static void nft_masq_ipv4_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_masq *priv = nft_expr_priv(expr);
struct nf_nat_range range;
- unsigned int verdict;
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
- verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
- &range, pkt->out);
-
- data[NFT_REG_VERDICT].verdict = verdict;
+ regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
+ &range, pkt->out);
}
static struct nft_expr_type nft_masq_ipv4_type;
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 6ecfce63201a..d8d795df9c13 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -18,26 +18,25 @@
#include <net/netfilter/nft_redir.h>
static void nft_redir_ipv4_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_redir *priv = nft_expr_priv(expr);
struct nf_nat_ipv4_multi_range_compat mr;
- unsigned int verdict;
memset(&mr, 0, sizeof(mr));
if (priv->sreg_proto_min) {
mr.range[0].min.all =
- *(__be16 *)&data[priv->sreg_proto_min].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_min];
mr.range[0].max.all =
- *(__be16 *)&data[priv->sreg_proto_max].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_max];
mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
mr.range[0].flags |= priv->flags;
- verdict = nf_nat_redirect_ipv4(pkt->skb, &mr, pkt->ops->hooknum);
- data[NFT_REG_VERDICT].verdict = verdict;
+ regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr,
+ pkt->ops->hooknum);
}
static struct nft_expr_type nft_redir_ipv4_type;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index 16a5d4d73d75..b07e58b51158 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -20,7 +20,7 @@
#include <net/netfilter/nft_reject.h>
static void nft_reject_ipv4_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
@@ -33,9 +33,11 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
case NFT_REJECT_TCP_RST:
nf_send_reset(pkt->skb, pkt->ops->hooknum);
break;
+ default:
+ break;
}
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
}
static struct nft_expr_type nft_reject_ipv4_type;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d8953ef0770c..e1f3b911dd1e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -63,7 +63,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
- tcp_death_row.tw_count, sockets,
+ atomic_read(&tcp_death_row.tw_count), sockets,
proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f2fc92a1241a..561cd4b8fc6e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -411,8 +411,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
icmp_out_count(net, ((struct icmphdr *)
skb_transport_header(skb))->type);
- err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
- rt->dst.dev, dst_output);
+ err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb,
+ NULL, rt->dst.dev, dst_output_sk);
if (err > 0)
err = net_xmit_errno(err);
if (err)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 18e3a12eb1b2..8c5cd9efebbc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -520,8 +520,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Race breaker. If space is freed after
* wspace test but before the flags are set,
- * IO signal will be lost.
+ * IO signal will be lost. Memory barrier
+ * pairs with the input side.
*/
+ smp_mb__after_atomic();
if (sk_stream_is_writeable(sk))
mask |= POLLOUT | POLLWRNORM;
}
@@ -2595,6 +2597,7 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
+ u32 rate;
memset(info, 0, sizeof(*info));
@@ -2655,10 +2658,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_total_retrans = tp->total_retrans;
- info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
- sk->sk_pacing_rate : ~0ULL;
- info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
- sk->sk_max_pacing_rate : ~0ULL;
+ rate = READ_ONCE(sk->sk_pacing_rate);
+ info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+
+ rate = READ_ONCE(sk->sk_max_pacing_rate);
+ info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index b504371af742..4376016f7fa5 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -277,7 +277,7 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
}
}
-static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
{
const struct dctcp *ca = inet_csk_ca(sk);
@@ -297,8 +297,9 @@ static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
info.dctcp_ab_tot = ca->acked_bytes_total;
}
- nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
+ return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
}
+ return 0;
}
static struct tcp_congestion_ops dctcp __read_mostly = {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 5da55e2b5cd2..e3d87aca6be8 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -303,6 +303,7 @@ fastopen:
} else if (foc->len > 0) /* Client presents an invalid cookie */
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ valid_foc.exp = foc->exp;
*foc = valid_foc;
return false;
}
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 1d5a30a90adf..67476f085e48 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -300,8 +300,7 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
}
/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_illinois_info(struct sock *sk, u32 ext,
- struct sk_buff *skb)
+static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb)
{
const struct illinois *ca = inet_csk_ca(sk);
@@ -318,8 +317,9 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
do_div(t, info.tcpv_rttcnt);
info.tcpv_rtt = t;
}
- nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
+ return 0;
}
static struct tcp_congestion_ops tcp_illinois __read_mostly = {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c1ce304ba8d2..3a4d9b34bed4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3099,17 +3099,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
- } else {
+ } else if (!(sacked & TCPCB_SACKED_ACKED)) {
last_ackt = skb->skb_mstamp;
WARN_ON_ONCE(last_ackt.v64 == 0);
if (!first_ackt.v64)
first_ackt = last_ackt;
- if (!(sacked & TCPCB_SACKED_ACKED)) {
- reord = min(pkts_acked, reord);
- if (!after(scb->end_seq, tp->high_seq))
- flag |= FLAG_ORIG_SACK_ACKED;
- }
+ reord = min(pkts_acked, reord);
+ if (!after(scb->end_seq, tp->high_seq))
+ flag |= FLAG_ORIG_SACK_ACKED;
}
if (sacked & TCPCB_SACKED_ACKED)
@@ -3603,6 +3601,23 @@ old_ack:
return 0;
}
+static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
+ bool syn, struct tcp_fastopen_cookie *foc,
+ bool exp_opt)
+{
+ /* Valid only in SYN or SYN-ACK with an even length. */
+ if (!foc || !syn || len < 0 || (len & 1))
+ return;
+
+ if (len >= TCP_FASTOPEN_COOKIE_MIN &&
+ len <= TCP_FASTOPEN_COOKIE_MAX)
+ memcpy(foc->val, cookie, len);
+ else if (len != 0)
+ len = -1;
+ foc->len = len;
+ foc->exp = exp_opt;
+}
+
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
@@ -3692,21 +3707,22 @@ void tcp_parse_options(const struct sk_buff *skb,
*/
break;
#endif
+ case TCPOPT_FASTOPEN:
+ tcp_parse_fastopen_option(
+ opsize - TCPOLEN_FASTOPEN_BASE,
+ ptr, th->syn, foc, false);
+ break;
+
case TCPOPT_EXP:
/* Fast Open option shares code 254 using a
- * 16 bits magic number. It's valid only in
- * SYN or SYN-ACK with an even size.
+ * 16 bits magic number.
*/
- if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
- get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
- !foc || !th->syn || (opsize & 1))
- break;
- foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
- if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
- foc->len <= TCP_FASTOPEN_COOKIE_MAX)
- memcpy(foc->val, ptr + 2, foc->len);
- else if (foc->len != 0)
- foc->len = -1;
+ if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
+ get_unaligned_be16(ptr) ==
+ TCPOPT_FASTOPEN_MAGIC)
+ tcp_parse_fastopen_option(opsize -
+ TCPOLEN_EXP_FASTOPEN_BASE,
+ ptr + 2, th->syn, foc, true);
break;
}
@@ -4829,6 +4845,8 @@ static void tcp_check_space(struct sock *sk)
{
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
+ /* pairs with tcp_poll() */
+ smp_mb__after_atomic();
if (sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
tcp_new_space(sk);
@@ -5360,8 +5378,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
- u16 mss = tp->rx_opt.mss_clamp;
- bool syn_drop;
+ u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
+ bool syn_drop = false;
if (mss == tp->rx_opt.user_mss) {
struct tcp_options_received opt;
@@ -5373,16 +5391,25 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
mss = opt.mss_clamp;
}
- if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */
+ if (!tp->syn_fastopen) {
+ /* Ignore an unsolicited cookie */
cookie->len = -1;
+ } else if (tp->total_retrans) {
+ /* SYN timed out and the SYN-ACK neither has a cookie nor
+ * acknowledges data. Presumably the remote received only
+ * the retransmitted (regular) SYNs: either the original
+ * SYN-data or the corresponding SYN-ACK was dropped.
+ */
+ syn_drop = (cookie->len < 0 && data);
+ } else if (cookie->len < 0 && !tp->syn_data) {
+ /* We requested a cookie but didn't get it. If we did not use
+ * the (old) exp opt format then try so next time (try_exp=1).
+ * Otherwise we go back to use the RFC7413 opt (try_exp=2).
+ */
+ try_exp = tp->syn_fastopen_exp ? 2 : 1;
+ }
- /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably
- * the remote receives only the retransmitted (regular) SYNs: either
- * the original SYN-data or the corresponding SYN-ACK is lost.
- */
- syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
-
- tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
+ tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
tcp_for_write_queue_from(data, sk) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 560f9571f7c4..3571f2be4470 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -897,9 +897,9 @@ EXPORT_SYMBOL(tcp_md5_do_lookup);
struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
const struct sock *addr_sk)
{
- union tcp_md5_addr *addr;
+ const union tcp_md5_addr *addr;
- addr = (union tcp_md5_addr *)&sk->sk_daddr;
+ addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
return tcp_md5_do_lookup(sk, addr, AF_INET);
}
EXPORT_SYMBOL(tcp_v4_md5_lookup);
@@ -1685,7 +1685,7 @@ do_time_wait:
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
goto process;
@@ -2242,9 +2242,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
static void get_timewait4_sock(const struct inet_timewait_sock *tw,
struct seq_file *f, int i)
{
+ long delta = tw->tw_timer.expires - jiffies;
__be32 dest, src;
__u16 destp, srcp;
- s32 delta = tw->tw_ttd - inet_tw_time_stamp();
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 78ecc4a01712..a51d63a43e33 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -28,7 +28,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s
struct tcp_fastopen_metrics {
u16 mss;
- u16 syn_loss:10; /* Recurring Fast Open SYN losses */
+ u16 syn_loss:10, /* Recurring Fast Open SYN losses */
+ try_exp:2; /* Request w/ exp. option (once) */
unsigned long last_syn_loss; /* Last Fast Open SYN loss */
struct tcp_fastopen_cookie cookie;
};
@@ -131,6 +132,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
if (fastopen_clear) {
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
+ tm->tcpm_fastopen.try_exp = 0;
+ tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
}
}
@@ -713,6 +716,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
if (tfom->mss)
*mss = tfom->mss;
*cookie = tfom->cookie;
+ if (cookie->len <= 0 && tfom->try_exp == 1)
+ cookie->exp = true;
*syn_loss = tfom->syn_loss;
*last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0;
} while (read_seqretry(&fastopen_seqlock, seq));
@@ -721,7 +726,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
}
void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
- struct tcp_fastopen_cookie *cookie, bool syn_lost)
+ struct tcp_fastopen_cookie *cookie, bool syn_lost,
+ u16 try_exp)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_metrics_block *tm;
@@ -738,6 +744,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
tfom->mss = mss;
if (cookie && cookie->len > 0)
tfom->cookie = *cookie;
+ else if (try_exp > tfom->try_exp &&
+ tfom->cookie.len <= 0 && !tfom->cookie.exp)
+ tfom->try_exp = try_exp;
if (syn_lost) {
++tfom->syn_loss;
tfom->last_syn_loss = jiffies;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d7003911c894..63d6311b5365 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -34,18 +34,7 @@ int sysctl_tcp_abort_on_overflow __read_mostly;
struct inet_timewait_death_row tcp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
- .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
.hashinfo = &tcp_hashinfo,
- .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
- (unsigned long)&tcp_death_row),
- .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
- inet_twdr_twkill_work),
-/* Short-time timewait calendar */
-
- .twcal_hand = -1,
- .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
- (unsigned long)&tcp_death_row),
};
EXPORT_SYMBOL_GPL(tcp_death_row);
@@ -158,7 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
if (!th->fin ||
TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
kill_with_rst:
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
return TCP_TW_RST;
}
@@ -174,11 +163,9 @@ kill_with_rst:
if (tcp_death_row.sysctl_tw_recycle &&
tcptw->tw_ts_recent_stamp &&
tcp_tw_remember_stamp(tw))
- inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, tw->tw_timeout);
else
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}
@@ -211,13 +198,12 @@ kill_with_rst:
*/
if (sysctl_tcp_rfc1337 == 0) {
kill:
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
}
}
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
@@ -267,8 +253,7 @@ kill:
* Do not reschedule in the last case.
*/
if (paws_reject || th->ack)
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
@@ -283,16 +268,15 @@ EXPORT_SYMBOL(tcp_timewait_state_process);
*/
void tcp_time_wait(struct sock *sk, int state, int timeo)
{
- struct inet_timewait_sock *tw = NULL;
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_timewait_sock *tw;
bool recycle_ok = false;
if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
recycle_ok = tcp_remember_stamp(sk);
- if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
- tw = inet_twsk_alloc(sk, state);
+ tw = inet_twsk_alloc(sk, &tcp_death_row, state);
if (tw) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@@ -355,8 +339,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
timeo = TCP_TIMEWAIT_LEN;
}
- inet_twsk_schedule(tw, &tcp_death_row, timeo,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, timeo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
@@ -628,10 +611,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
LINUX_MIB_TCPACKSKIPPEDSYNRECV,
&tcp_rsk(req)->last_oow_ack_time) &&
- !inet_rtx_syn_ack(sk, req))
- mod_timer_pending(&req->rsk_timer, jiffies +
- min(TCP_TIMEOUT_INIT << req->num_timeout,
- TCP_RTO_MAX));
+ !inet_rtx_syn_ack(sk, req)) {
+ unsigned long expires = jiffies;
+
+ expires += min(TCP_TIMEOUT_INIT << req->num_timeout,
+ TCP_RTO_MAX);
+ if (!fastopen)
+ mod_timer_pending(&req->rsk_timer, expires);
+ else
+ req->rsk_timer.expires = expires;
+ }
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7404e5238e00..8c8d7e06b72f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -518,17 +518,26 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
+ u8 *p = (u8 *)ptr;
+ u32 len; /* Fast Open option length */
+
+ if (foc->exp) {
+ len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
+ *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
+ TCPOPT_FASTOPEN_MAGIC);
+ p += TCPOLEN_EXP_FASTOPEN_BASE;
+ } else {
+ len = TCPOLEN_FASTOPEN_BASE + foc->len;
+ *p++ = TCPOPT_FASTOPEN;
+ *p++ = len;
+ }
- *ptr++ = htonl((TCPOPT_EXP << 24) |
- ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
- TCPOPT_FASTOPEN_MAGIC);
-
- memcpy(ptr, foc->val, foc->len);
- if ((foc->len & 3) == 2) {
- u8 *align = ((u8 *)ptr) + foc->len;
- align[0] = align[1] = TCPOPT_NOP;
+ memcpy(p, foc->val, foc->len);
+ if ((len & 3) == 2) {
+ p[foc->len] = TCPOPT_NOP;
+ p[foc->len + 1] = TCPOPT_NOP;
}
- ptr += (foc->len + 3) >> 2;
+ ptr += (len + 3) >> 2;
}
}
@@ -583,13 +592,17 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
if (fastopen && fastopen->cookie.len >= 0) {
- u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
+ u32 need = fastopen->cookie.len;
+
+ need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
+ TCPOLEN_FASTOPEN_BASE;
need = (need + 3) & ~3U; /* Align to 32 bits */
if (remaining >= need) {
opts->options |= OPTION_FAST_OPEN_COOKIE;
opts->fastopen_cookie = &fastopen->cookie;
remaining -= need;
tp->syn_fastopen = 1;
+ tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
}
}
@@ -641,8 +654,11 @@ static unsigned int tcp_synack_options(struct sock *sk,
if (unlikely(!ireq->tstamp_ok))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
- if (foc && foc->len >= 0) {
- u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
+ if (foc != NULL && foc->len >= 0) {
+ u32 need = foc->len;
+
+ need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
+ TCPOLEN_FASTOPEN_BASE;
need = (need + 3) & ~3U; /* Align to 32 bits */
if (remaining >= need) {
opts->options |= OPTION_FAST_OPEN_COOKIE;
@@ -2978,6 +2994,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
rcu_read_unlock();
#endif
+ /* Do not fool tcpdump (if any), clean our debris */
+ skb->tstamp.tv64 = 0;
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2568fd282873..8c65dc147d8b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -167,7 +167,7 @@ static int tcp_write_timeout(struct sock *sk)
if (icsk->icsk_retransmits) {
dst_negative_advice(sk);
if (tp->syn_fastopen || tp->syn_data)
- tcp_fastopen_cache_set(sk, 0, NULL, true);
+ tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
if (tp->syn_data)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a6afde666ab1..c71a1b8f7bde 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -286,7 +286,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
/* Extract info for Tcp socket info provided via netlink. */
-void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
{
const struct vegas *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
@@ -297,8 +297,9 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
.tcpv_minrtt = ca->minRTT,
};
- nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
+ return 0;
}
EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 0531b99d8637..e8a6b33cc61d 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -19,6 +19,6 @@ void tcp_vegas_init(struct sock *sk);
void tcp_vegas_state(struct sock *sk, u8 ca_state);
void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
-void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
#endif /* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index bb63fba47d47..b3c57cceb990 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -256,8 +256,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
}
/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_westwood_info(struct sock *sk, u32 ext,
- struct sk_buff *skb)
+static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb)
{
const struct westwood *ca = inet_csk_ca(sk);
@@ -268,8 +267,9 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
};
- nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
+ return 0;
}
static struct tcp_congestion_ops tcp_westwood __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2162fc6ce1c1..d10b7e0112eb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -433,7 +433,6 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
-
/* called with read_rcu_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@ -1171,7 +1170,6 @@ out:
return ret;
}
-
/**
* first_packet_length - return length of first packet in receive queue
* @sk: socket
@@ -1355,7 +1353,6 @@ csum_copy_err:
goto try_again;
}
-
int udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -1579,7 +1576,6 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
udp_lib_checksum_complete(skb))
goto csum_error;
-
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
@@ -1609,7 +1605,6 @@ drop:
return -1;
}
-
static void flush_stack(struct sock **stack, unsigned int count,
struct sk_buff *skb, unsigned int final)
{
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index c83b35485056..6bb98cc193c9 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -75,7 +75,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
-int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
+int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
__be16 df, __be16 src_port, __be16 dst_port,
bool xnet, bool nocheck)
@@ -92,7 +92,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
udp_set_csum(nocheck, skb, src, dst, skb->len);
- return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP,
+ return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP,
tos, ttl, df, xnet);
}
EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index cac7468db0a1..60b032f58ccc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -22,7 +22,7 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
return xfrm4_extract_header(skb);
}
-static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
+static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb)
{
if (!skb_dst(skb)) {
const struct iphdr *iph = ip_hdr(skb);
@@ -52,7 +52,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
iph->tot_len = htons(skb->len);
ip_send_check(iph);
- NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
+ skb->dev, NULL,
xfrm4_rcv_encap_finish);
return 0;
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index dab73813cb92..2878dbfffeb7 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
}
EXPORT_SYMBOL(xfrm4_prepare_output);
-int xfrm4_output_finish(struct sk_buff *skb)
+int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
@@ -77,26 +77,26 @@ int xfrm4_output_finish(struct sk_buff *skb)
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
#endif
- return xfrm_output(skb);
+ return xfrm_output(sk, skb);
}
-static int __xfrm4_output(struct sk_buff *skb)
+static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
{
struct xfrm_state *x = skb_dst(skb)->xfrm;
#ifdef CONFIG_NETFILTER
if (!x) {
IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
#endif
- return x->outer_mode->afinfo->output_finish(skb);
+ return x->outer_mode->afinfo->output_finish(sk, skb);
}
int xfrm4_output(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
NULL, skb_dst(skb)->dev, __xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 033f17816ef4..871641bc1ed4 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -246,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f724329d7436..b5e6cc1d4a73 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -760,7 +760,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_set_inner_protocol(skb, protocol);
- ip6tunnel_xmit(skb, dev);
+ ip6tunnel_xmit(NULL, skb, dev);
if (ndst)
ip6_tnl_dst_store(tunnel, ndst);
return 0;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index fb97f7f8d4ed..f2e464eba5ef 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -46,8 +46,7 @@
#include <net/xfrm.h>
#include <net/inet_ecn.h>
-
-int ip6_rcv_finish(struct sk_buff *skb)
+int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
{
if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
@@ -183,7 +182,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
+ dev, NULL,
ip6_rcv_finish);
err:
IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -198,7 +198,7 @@ drop:
*/
-static int ip6_input_finish(struct sk_buff *skb)
+static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
const struct inet6_protocol *ipprot;
@@ -277,7 +277,8 @@ discard:
int ip6_input(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb,
+ skb->dev, NULL,
ip6_input_finish);
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 654f245aa930..7fde1f265c90 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,7 +56,7 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
-static int ip6_finish_output2(struct sk_buff *skb)
+static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
@@ -70,7 +70,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
+ if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_socket(dev_net(dev), skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
@@ -82,7 +82,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
*/
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- newskb, NULL, newskb->dev,
+ sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -122,14 +122,14 @@ static int ip6_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
-static int ip6_finish_output(struct sk_buff *skb)
+static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
- return ip6_fragment(skb, ip6_finish_output2);
+ return ip6_fragment(sk, skb, ip6_finish_output2);
else
- return ip6_finish_output2(skb);
+ return ip6_finish_output2(sk, skb);
}
int ip6_output(struct sock *sk, struct sk_buff *skb)
@@ -143,7 +143,8 @@ int ip6_output(struct sock *sk, struct sk_buff *skb)
return 0;
}
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
+ NULL, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
@@ -223,8 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
- dst->dev, dst_output);
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
+ NULL, dst->dev, dst_output_sk);
}
skb->dev = dst->dev;
@@ -316,10 +317,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
return 0;
}
-static inline int ip6_forward_finish(struct sk_buff *skb)
+static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
{
skb_sender_cpu_clear(skb);
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -511,7 +512,8 @@ int ip6_forward(struct sk_buff *skb)
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
+ skb->dev, dst->dev,
ip6_forward_finish);
error:
@@ -538,7 +540,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
-int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip6_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -667,7 +670,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
ip6_copy_metadata(frag, skb);
}
- err = output(skb);
+ err = output(sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
@@ -800,7 +803,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- err = output(frag);
+ err = output(sk, frag);
if (err)
goto fail;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b6a211a150b2..5cafd92c2312 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1100,7 +1100,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
- ip6tunnel_xmit(skb, dev);
+ ip6tunnel_xmit(NULL, skb, dev);
if (ndst)
ip6_tnl_dst_store(t, ndst);
return 0;
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 32d9b268e7d8..bba8903e871f 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -62,7 +62,8 @@ error:
}
EXPORT_SYMBOL_GPL(udp_sock_create6);
-int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
+int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be16 src_port,
@@ -97,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
ip6h->daddr = *daddr;
ip6h->saddr = *saddr;
- ip6tunnel_xmit(skb, dev);
+ ip6tunnel_xmit(sk, skb, dev);
return 0;
}
EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index b53148444e15..ed9d681207fa 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -288,8 +288,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
static void vti6_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n = net_generic(t->net, vti6_net_id);
if (dev == ip6n->fb_tnl_dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8493a22e74eb..74ceb73c1c9a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1986,13 +1986,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
}
#endif
-static inline int ip6mr_forward2_finish(struct sk_buff *skb)
+static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
{
IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTOCTETS, skb->len);
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
/*
@@ -2064,7 +2064,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
+ skb->dev, dev,
ip6mr_forward2_finish);
out_free:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fac1f27e428e..083b2927fc67 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1644,8 +1644,9 @@ static void mld_sendpack(struct sk_buff *skb)
payload_len = skb->len;
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
- dst_output);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net->ipv6.igmp_sk, skb, NULL, skb->dev,
+ dst_output_sk);
out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
@@ -2007,8 +2008,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
}
skb_dst_set(skb, dst);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
- dst_output);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
+ NULL, skb->dev, dst_output_sk);
out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 71fde6cafb35..96f153c0846b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -463,8 +463,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
idev = __in6_dev_get(dst->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
- dst_output);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
+ NULL, dst->dev,
+ dst_output_sk);
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index e2b882056751..a45db0b4785c 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
nf_ct_frag6_consume_orig(reasm);
- NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
+ NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm,
state->in, state->out,
state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 3afdce03d94e..94b4c6dfb400 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -13,6 +13,7 @@
#include <net/ip6_checksum.h>
#include <net/netfilter/ipv6/nf_reject.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv6/nf_reject.h>
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
@@ -195,7 +196,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = oldskb->nf_bridge->physindev;
+
+ nskb->dev = nf_bridge_get_physindev(oldskb);
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 529c119cbb14..cd1ac1637a05 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -18,19 +18,16 @@
#include <net/netfilter/ipv6/nf_nat_masquerade.h>
static void nft_masq_ipv6_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_masq *priv = nft_expr_priv(expr);
struct nf_nat_range range;
- unsigned int verdict;
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
- verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
-
- data[NFT_REG_VERDICT].verdict = verdict;
+ regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
}
static struct nft_expr_type nft_masq_ipv6_type;
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index 11820b6b3613..effd393bd517 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -18,26 +18,25 @@
#include <net/netfilter/nf_nat_redirect.h>
static void nft_redir_ipv6_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_redir *priv = nft_expr_priv(expr);
struct nf_nat_range range;
- unsigned int verdict;
memset(&range, 0, sizeof(range));
if (priv->sreg_proto_min) {
range.min_proto.all =
- *(__be16 *)&data[priv->sreg_proto_min].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_min],
range.max_proto.all =
- *(__be16 *)&data[priv->sreg_proto_max].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_max],
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
range.flags |= priv->flags;
- verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum);
- data[NFT_REG_VERDICT].verdict = verdict;
+ regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range,
+ pkt->ops->hooknum);
}
static struct nft_expr_type nft_redir_ipv6_type;
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index f73285924144..d0d1540ecf87 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -20,7 +20,7 @@
#include <net/netfilter/ipv6/nf_reject.h>
static void nft_reject_ipv6_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
@@ -34,9 +34,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
case NFT_REJECT_TCP_RST:
nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
break;
+ default:
+ break;
}
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
}
static struct nft_expr_type nft_reject_ipv6_type;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 4016a6ef9d61..85892af57364 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -136,7 +136,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
EXPORT_SYMBOL(ip6_dst_hoplimit);
#endif
-int __ip6_local_out(struct sk_buff *skb)
+static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
int len;
@@ -146,19 +146,30 @@ int __ip6_local_out(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(len);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
- return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
- skb_dst(skb)->dev, dst_output);
+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
+}
+
+int __ip6_local_out(struct sk_buff *skb)
+{
+ return __ip6_local_out_sk(skb->sk, skb);
}
EXPORT_SYMBOL_GPL(__ip6_local_out);
-int ip6_local_out(struct sk_buff *skb)
+int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
int err;
- err = __ip6_local_out(skb);
+ err = __ip6_local_out_sk(sk, skb);
if (likely(err == 1))
- err = dst_output(skb);
+ err = dst_output_sk(sk, skb);
return err;
}
+EXPORT_SYMBOL_GPL(ip6_local_out_sk);
+
+int ip6_local_out(struct sk_buff *skb)
+{
+ return ip6_local_out_sk(skb->sk, skb);
+}
EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 79ccdb4c1b33..8072bd4139b7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -652,8 +652,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
goto error_fault;
IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
- rt->dst.dev, dst_output);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
+ NULL, rt->dst.dev, dst_output_sk);
if (err > 0)
err = net_xmit_errno(err);
if (err)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6cf2026a9cea..ac35a28599be 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -983,7 +983,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
- err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr,
protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f73a97f6e68e..ad51df85aa00 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1486,7 +1486,7 @@ do_time_wait:
ntohs(th->dest), tcp_v6_iif(skb));
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
@@ -1728,9 +1728,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
static void get_timewait6_sock(struct seq_file *seq,
struct inet_timewait_sock *tw, int i)
{
+ long delta = tw->tw_timer.expires - jiffies;
const struct in6_addr *dest, *src;
__u16 destp, srcp;
- s32 delta = tw->tw_ttd - inet_tw_time_stamp();
dest = &tw->tw_v6_daddr;
src = &tw->tw_v6_rcv_saddr;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 120aff9aa010..3477c919fcc8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -120,7 +120,6 @@ static u32 udp6_portaddr_hash(const struct net *net,
return hash ^ port;
}
-
int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
@@ -385,7 +384,6 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
-
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
@@ -1555,7 +1553,6 @@ static struct inet_protosw udpv6_protosw = {
.flags = INET_PROTOSW_PERMANENT,
};
-
int __init udpv6_init(void)
{
int ret;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index f48fbe4d16f5..74bd17882a2f 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,7 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
ipv6_hdr(skb)->payload_len = htons(skb->len);
__skb_push(skb, skb->data - skb_network_header(skb));
- NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
+ skb->dev, NULL,
ip6_rcv_finish);
return -1;
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 010f8bd2d577..09c76a7b474d 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
}
EXPORT_SYMBOL(xfrm6_prepare_output);
-int xfrm6_output_finish(struct sk_buff *skb)
+int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
@@ -128,10 +128,10 @@ int xfrm6_output_finish(struct sk_buff *skb)
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
#endif
- return xfrm_output(skb);
+ return xfrm_output(sk, skb);
}
-static int __xfrm6_output(struct sk_buff *skb)
+static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
@@ -140,7 +140,7 @@ static int __xfrm6_output(struct sk_buff *skb)
#ifdef CONFIG_NETFILTER
if (!x) {
IP6CB(skb)->flags |= IP6SKB_REROUTED;
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
#endif
@@ -160,14 +160,15 @@ static int __xfrm6_output(struct sk_buff *skb)
if (x->props.mode == XFRM_MODE_TUNNEL &&
((skb->len > mtu && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)))) {
- return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
+ return ip6_fragment(sk, skb,
+ x->outer_mode->afinfo->output_finish);
}
- return x->outer_mode->afinfo->output_finish(skb);
+ return x->outer_mode->afinfo->output_finish(sk, skb);
}
int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
NULL, skb_dst(skb)->dev, __xfrm6_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 20522492d8cc..cce9d425c718 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -188,6 +188,43 @@ ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
__release(agg_queue);
}
+static void
+ieee80211_agg_stop_txq(struct sta_info *sta, int tid)
+{
+ struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct txq_info *txqi;
+
+ if (!txq)
+ return;
+
+ txqi = to_txq_info(txq);
+
+ /* Lock here to protect against further seqno updates on dequeue */
+ spin_lock_bh(&txqi->queue.lock);
+ set_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+ spin_unlock_bh(&txqi->queue.lock);
+}
+
+static void
+ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
+{
+ struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct txq_info *txqi;
+
+ if (!txq)
+ return;
+
+ txqi = to_txq_info(txq);
+
+ if (enable)
+ set_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
+ else
+ clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
+
+ clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+ drv_wake_tx_queue(sta->sdata->local, txqi);
+}
+
/*
* splice packets from the STA's pending to the local pending,
* requires a call to ieee80211_agg_splice_finish later
@@ -247,6 +284,7 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid)
ieee80211_assign_tid_tx(sta, tid, NULL);
ieee80211_agg_splice_finish(sta->sdata, tid);
+ ieee80211_agg_start_txq(sta, tid, false);
kfree_rcu(tid_tx, rcu_head);
}
@@ -418,6 +456,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
+ ieee80211_agg_stop_txq(sta, tid);
+
/*
* Make sure no packets are being processed. This ensures that
* we have a valid starting sequence number and that in-flight
@@ -440,6 +480,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
ieee80211_agg_splice_finish(sdata, tid);
spin_unlock_bh(&sta->lock);
+ ieee80211_agg_start_txq(sta, tid, false);
+
kfree_rcu(tid_tx, rcu_head);
return;
}
@@ -669,6 +711,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
ieee80211_agg_splice_finish(sta->sdata, tid);
spin_unlock_bh(&sta->lock);
+
+ ieee80211_agg_start_txq(sta, tid, true);
}
void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 0a39d3db951a..26e1ca8a474a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1367,4 +1367,16 @@ drv_tdls_recv_channel_switch(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline void drv_wake_tx_queue(struct ieee80211_local *local,
+ struct txq_info *txq)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ trace_drv_wake_tx_queue(local, sdata, txq);
+ local->ops->wake_tx_queue(&local->hw, &txq->txq);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 487f5e2a9283..ab46ab4a7249 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -26,6 +26,7 @@
#include <linux/etherdevice.h>
#include <linux/leds.h>
#include <linux/idr.h>
+#include <linux/rhashtable.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
#include <net/mac80211.h>
@@ -810,6 +811,19 @@ struct mac80211_qos_map {
struct rcu_head rcu_head;
};
+enum txq_info_flags {
+ IEEE80211_TXQ_STOP,
+ IEEE80211_TXQ_AMPDU,
+};
+
+struct txq_info {
+ struct sk_buff_head queue;
+ unsigned long flags;
+
+ /* keep last! */
+ struct ieee80211_txq txq;
+};
+
struct ieee80211_sub_if_data {
struct list_head list;
@@ -852,6 +866,7 @@ struct ieee80211_sub_if_data {
bool control_port_no_encrypt;
int encrypt_headroom;
+ atomic_t txqs_len[IEEE80211_NUM_ACS];
struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
struct mac80211_qos_map __rcu *qos_map;
@@ -1187,7 +1202,7 @@ struct ieee80211_local {
spinlock_t tim_lock;
unsigned long num_sta;
struct list_head sta_list;
- struct sta_info __rcu *sta_hash[STA_HASH_SIZE];
+ struct rhashtable sta_hash;
struct timer_list sta_cleanup;
int sta_generation;
@@ -1449,6 +1464,10 @@ static inline struct ieee80211_local *hw_to_local(
return container_of(hw, struct ieee80211_local, hw);
}
+static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq)
+{
+ return container_of(txq, struct txq_info, txq);
+}
static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
{
@@ -1905,6 +1924,9 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
return true;
}
+void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct txq_info *txq, int tid);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
const u8 *extra, size_t extra_len, const u8 *bssid,
@@ -1943,10 +1965,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata,
void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata);
-size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
- const u8 *ids, int n_ids,
- const u8 *after_ric, int n_after_ric,
- size_t offset);
size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset);
u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u16 cap);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index a0cd97fd0c49..b4ac596a7cb7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -969,6 +969,13 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ if (sdata->vif.txq) {
+ struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+
+ ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
+ atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
+ }
+
if (local->open_count == 0)
ieee80211_clear_tx_pending(local);
@@ -1654,6 +1661,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
{
struct net_device *ndev = NULL;
struct ieee80211_sub_if_data *sdata = NULL;
+ struct txq_info *txqi;
int ret, i;
int txqs = 1;
@@ -1673,10 +1681,18 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_assign_perm_addr(local, wdev->address, type);
memcpy(sdata->vif.addr, wdev->address, ETH_ALEN);
} else {
+ int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size,
+ sizeof(void *));
+ int txq_size = 0;
+
+ if (local->ops->wake_tx_queue)
+ txq_size += sizeof(struct txq_info) +
+ local->hw.txq_data_size;
+
if (local->hw.queues >= IEEE80211_NUM_ACS)
txqs = IEEE80211_NUM_ACS;
- ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size,
+ ndev = alloc_netdev_mqs(size + txq_size,
name, name_assign_type,
ieee80211_if_setup, txqs, 1);
if (!ndev)
@@ -1711,6 +1727,11 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN);
memcpy(sdata->name, ndev->name, IFNAMSIZ);
+ if (txq_size) {
+ txqi = netdev_priv(ndev) + size;
+ ieee80211_init_tx_queue(sdata, NULL, txqi, 0);
+ }
+
sdata->dev = ndev;
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4977967c8b00..df3051d96aff 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -557,6 +557,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
local = wiphy_priv(wiphy);
+ if (sta_info_init(local))
+ goto err_free;
+
local->hw.wiphy = wiphy;
local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN);
@@ -629,8 +632,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
spin_lock_init(&local->ack_status_lock);
idr_init(&local->ack_status_frames);
- sta_info_init(local);
-
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_head_init(&local->pending[i]);
atomic_set(&local->agg_queue_stop[i], 0);
@@ -650,6 +651,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
ieee80211_roc_setup(local);
return &local->hw;
+ err_free:
+ wiphy_free(wiphy);
+ return NULL;
}
EXPORT_SYMBOL(ieee80211_alloc_hw_nm);
@@ -1035,6 +1039,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->dynamic_ps_forced_timeout = -1;
+ if (!local->hw.txq_ac_max_pending)
+ local->hw.txq_ac_max_pending = 64;
+
result = ieee80211_wep_init(local);
if (result < 0)
wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
@@ -1173,7 +1180,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
destroy_workqueue(local->workqueue);
wiphy_unregister(local->hw.wiphy);
- sta_info_stop(local);
ieee80211_wep_free(local);
ieee80211_led_exit(local);
kfree(local->int_scan_req);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 00103f36dcbf..26053bf2faa8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1348,15 +1348,15 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
*/
if (has_80211h_pwr &&
(!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) {
- sdata_info(sdata,
- "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
- pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
- sdata->u.mgd.bssid);
+ sdata_dbg(sdata,
+ "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
+ pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
+ sdata->u.mgd.bssid);
new_ap_level = pwr_level_80211h;
} else { /* has_cisco_pwr is always true here. */
- sdata_info(sdata,
- "Limiting TX power to %d dBm as advertised by %pM\n",
- pwr_level_cisco, sdata->u.mgd.bssid);
+ sdata_dbg(sdata,
+ "Limiting TX power to %d dBm as advertised by %pM\n",
+ pwr_level_cisco, sdata->u.mgd.bssid);
new_ap_level = pwr_level_cisco;
}
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index ef6e8a6c4253..247552a7f6c2 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -69,14 +69,39 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
return i;
}
+/* return current EMWA throughput */
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
+{
+ int usecs;
+
+ usecs = mr->perfect_tx_time;
+ if (!usecs)
+ usecs = 1000000;
+
+ /* reset thr. below 10% success */
+ if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100))
+ return 0;
+
+ if (prob_ewma > MINSTREL_FRAC(90, 100))
+ return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs));
+ else
+ return MINSTREL_TRUNC(100000 * (prob_ewma / usecs));
+}
+
/* find & sort topmost throughput rates */
static inline void
minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
{
int j = MAX_THR_RATES;
+ struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats;
+ struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats;
- while (j > 0 && mi->r[i].stats.cur_tp > mi->r[tp_list[j - 1]].stats.cur_tp)
+ while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) >
+ minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) {
j--;
+ tmp_mrs = &mi->r[tp_list[j - 1]].stats;
+ }
+
if (j < MAX_THR_RATES - 1)
memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1));
if (j < MAX_THR_RATES)
@@ -127,13 +152,47 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
rate_control_set_rates(mp->hw, mi->sta, ratetbl);
}
+/*
+* Recalculate statistics and counters of a given rate
+*/
+void
+minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs)
+{
+ if (unlikely(mrs->attempts > 0)) {
+ mrs->sample_skipped = 0;
+ mrs->cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts);
+ if (unlikely(!mrs->att_hist)) {
+ mrs->prob_ewma = mrs->cur_prob;
+ } else {
+ /* update exponential weighted moving variance */
+ mrs->prob_ewmsd = minstrel_ewmsd(mrs->prob_ewmsd,
+ mrs->cur_prob,
+ mrs->prob_ewma,
+ EWMA_LEVEL);
+
+ /*update exponential weighted moving avarage */
+ mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma,
+ mrs->cur_prob,
+ EWMA_LEVEL);
+ }
+ mrs->att_hist += mrs->attempts;
+ mrs->succ_hist += mrs->success;
+ } else {
+ mrs->sample_skipped++;
+ }
+
+ mrs->last_success = mrs->success;
+ mrs->last_attempts = mrs->attempts;
+ mrs->success = 0;
+ mrs->attempts = 0;
+}
+
static void
minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
{
u8 tmp_tp_rate[MAX_THR_RATES];
u8 tmp_prob_rate = 0;
- u32 usecs;
- int i;
+ int i, tmp_cur_tp, tmp_prob_tp;
for (i = 0; i < MAX_THR_RATES; i++)
tmp_tp_rate[i] = 0;
@@ -141,38 +200,15 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
struct minstrel_rate_stats *mrs = &mi->r[i].stats;
+ struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats;
- usecs = mr->perfect_tx_time;
- if (!usecs)
- usecs = 1000000;
-
- if (unlikely(mrs->attempts > 0)) {
- mrs->sample_skipped = 0;
- mrs->cur_prob = MINSTREL_FRAC(mrs->success,
- mrs->attempts);
- mrs->succ_hist += mrs->success;
- mrs->att_hist += mrs->attempts;
- mrs->probability = minstrel_ewma(mrs->probability,
- mrs->cur_prob,
- EWMA_LEVEL);
- } else
- mrs->sample_skipped++;
-
- mrs->last_success = mrs->success;
- mrs->last_attempts = mrs->attempts;
- mrs->success = 0;
- mrs->attempts = 0;
-
- /* Update throughput per rate, reset thr. below 10% success */
- if (mrs->probability < MINSTREL_FRAC(10, 100))
- mrs->cur_tp = 0;
- else
- mrs->cur_tp = mrs->probability * (1000000 / usecs);
+ /* Update statistics of success probability per rate */
+ minstrel_calc_rate_stats(mrs);
/* Sample less often below the 10% chance of success.
* Sample less often above the 95% chance of success. */
- if (mrs->probability > MINSTREL_FRAC(95, 100) ||
- mrs->probability < MINSTREL_FRAC(10, 100)) {
+ if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
+ mrs->prob_ewma < MINSTREL_FRAC(10, 100)) {
mr->adjusted_retry_count = mrs->retry_count >> 1;
if (mr->adjusted_retry_count > 2)
mr->adjusted_retry_count = 2;
@@ -192,11 +228,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
* choose the maximum throughput rate as max_prob_rate
* (2) if all success probabilities < 95%, the rate with
* highest success probability is chosen as max_prob_rate */
- if (mrs->probability >= MINSTREL_FRAC(95, 100)) {
- if (mrs->cur_tp >= mi->r[tmp_prob_rate].stats.cur_tp)
+ if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) {
+ tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma);
+ tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate],
+ tmp_mrs->prob_ewma);
+ if (tmp_cur_tp >= tmp_prob_tp)
tmp_prob_rate = i;
} else {
- if (mrs->probability >= mi->r[tmp_prob_rate].stats.probability)
+ if (mrs->prob_ewma >= tmp_mrs->prob_ewma)
tmp_prob_rate = i;
}
}
@@ -215,7 +254,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
#endif
/* Reset update timer */
- mi->stats_update = jiffies;
+ mi->last_stats_update = jiffies;
minstrel_update_rates(mp, mi);
}
@@ -253,7 +292,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
if (mi->sample_deferred > 0)
mi->sample_deferred--;
- if (time_after(jiffies, mi->stats_update +
+ if (time_after(jiffies, mi->last_stats_update +
(mp->update_interval * HZ) / 1000))
minstrel_update_stats(mp, mi);
}
@@ -385,7 +424,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
* has a probability of >95%, we shouldn't be attempting
* to use it, as this only wastes precious airtime */
if (!mrr_capable &&
- (mi->r[ndx].stats.probability > MINSTREL_FRAC(95, 100)))
+ (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100)))
return;
mi->prev_sample = true;
@@ -519,7 +558,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
}
mi->n_rates = n;
- mi->stats_update = jiffies;
+ mi->last_stats_update = jiffies;
init_sample_table(mi);
minstrel_update_rates(mp, mi);
@@ -553,7 +592,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
if (!mi->sample_table)
goto error1;
- mi->stats_update = jiffies;
+ mi->last_stats_update = jiffies;
return mi;
error1:
@@ -663,12 +702,18 @@ minstrel_free(void *priv)
static u32 minstrel_get_expected_throughput(void *priv_sta)
{
struct minstrel_sta_info *mi = priv_sta;
+ struct minstrel_rate_stats *tmp_mrs;
int idx = mi->max_tp_rate[0];
+ int tmp_cur_tp;
/* convert pkt per sec in kbps (1200 is the average pkt size used for
* computing cur_tp
*/
- return MINSTREL_TRUNC(mi->r[idx].stats.cur_tp) * 1200 * 8 / 1024;
+ tmp_mrs = &mi->r[idx].stats;
+ tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma);
+ tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
+
+ return tmp_cur_tp;
}
const struct rate_control_ops mac80211_minstrel = {
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 410efe620c57..c230bbe93262 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -13,7 +13,6 @@
#define EWMA_DIV 128
#define SAMPLE_COLUMNS 10 /* number of columns in sample table */
-
/* scaled fraction values */
#define MINSTREL_SCALE 16
#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
@@ -24,11 +23,34 @@
/*
* Perform EWMA (Exponentially Weighted Moving Average) calculation
- */
+ */
static inline int
minstrel_ewma(int old, int new, int weight)
{
- return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV;
+ int diff, incr;
+
+ diff = new - old;
+ incr = (EWMA_DIV - weight) * diff / EWMA_DIV;
+
+ return old + incr;
+}
+
+/*
+ * Perform EWMSD (Exponentially Weighted Moving Standard Deviation) calculation
+ */
+static inline int
+minstrel_ewmsd(int old_ewmsd, int cur_prob, int prob_ewma, int weight)
+{
+ int diff, incr, tmp_var;
+
+ /* calculate exponential weighted moving variance */
+ diff = MINSTREL_TRUNC((cur_prob - prob_ewma) * 1000000);
+ incr = (EWMA_DIV - weight) * diff / EWMA_DIV;
+ tmp_var = old_ewmsd * old_ewmsd;
+ tmp_var = weight * (tmp_var + diff * incr / 1000000) / EWMA_DIV;
+
+ /* return standard deviation */
+ return (u16) int_sqrt(tmp_var);
}
struct minstrel_rate_stats {
@@ -39,11 +61,13 @@ struct minstrel_rate_stats {
/* total attempts/success counters */
u64 att_hist, succ_hist;
- /* current throughput */
- unsigned int cur_tp;
-
- /* packet delivery probabilities */
- unsigned int cur_prob, probability;
+ /* statistis of packet delivery probability
+ * cur_prob - current prob within last update intervall
+ * prob_ewma - exponential weighted moving average of prob
+ * prob_ewmsd - exp. weighted moving standard deviation of prob */
+ unsigned int cur_prob;
+ unsigned int prob_ewma;
+ u16 prob_ewmsd;
/* maximum retry counts */
u8 retry_count;
@@ -71,7 +95,7 @@ struct minstrel_rate {
struct minstrel_sta_info {
struct ieee80211_sta *sta;
- unsigned long stats_update;
+ unsigned long last_stats_update;
unsigned int sp_ack_dur;
unsigned int rate_avg;
@@ -95,6 +119,7 @@ struct minstrel_sta_info {
#ifdef CONFIG_MAC80211_DEBUGFS
struct dentry *dbg_stats;
+ struct dentry *dbg_stats_csv;
#endif
};
@@ -121,7 +146,6 @@ struct minstrel_priv {
u32 fixed_rate_idx;
struct dentry *dbg_fixed_rate;
#endif
-
};
struct minstrel_debugfs_info {
@@ -133,8 +157,13 @@ extern const struct rate_control_ops mac80211_minstrel;
void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
+/* Recalculate success probabilities and counters for a given rate using EWMA */
+void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs);
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma);
+
/* debugfs */
int minstrel_stats_open(struct inode *inode, struct file *file);
+int minstrel_stats_csv_open(struct inode *inode, struct file *file);
ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos);
int minstrel_stats_release(struct inode *inode, struct file *file);
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 2acab1bcaa4b..1db5f7c3318a 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -54,12 +54,28 @@
#include <net/mac80211.h>
#include "rc80211_minstrel.h"
+ssize_t
+minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
+{
+ struct minstrel_debugfs_info *ms;
+
+ ms = file->private_data;
+ return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
+}
+
+int
+minstrel_stats_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
int
minstrel_stats_open(struct inode *inode, struct file *file)
{
struct minstrel_sta_info *mi = inode->i_private;
struct minstrel_debugfs_info *ms;
- unsigned int i, tp, prob, eprob;
+ unsigned int i, tp_max, tp_avg, prob, eprob;
char *p;
ms = kmalloc(2048, GFP_KERNEL);
@@ -68,8 +84,14 @@ minstrel_stats_open(struct inode *inode, struct file *file)
file->private_data = ms;
p = ms->buf;
- p += sprintf(p, "rate tpt eprob *prob"
- " *ok(*cum) ok( cum)\n");
+ p += sprintf(p, "\n");
+ p += sprintf(p, "best __________rate_________ ______"
+ "statistics______ ________last_______ "
+ "______sum-of________\n");
+ p += sprintf(p, "rate [name idx airtime max_tp] [ ø(tp) ø(prob) "
+ "sd(prob)] [prob.|retry|suc|att] "
+ "[#success | #attempts]\n");
+
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
struct minstrel_rate_stats *mrs = &mi->r[i].stats;
@@ -79,18 +101,26 @@ minstrel_stats_open(struct inode *inode, struct file *file)
*(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' ';
*(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' ';
*(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';
- p += sprintf(p, "%3u%s", mr->bitrate / 2,
+
+ p += sprintf(p, " %3u%s ", mr->bitrate / 2,
(mr->bitrate & 1 ? ".5" : " "));
+ p += sprintf(p, "%3u ", i);
+ p += sprintf(p, "%6u ", mr->perfect_tx_time);
- tp = MINSTREL_TRUNC(mrs->cur_tp / 10);
+ tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
+ tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
- eprob = MINSTREL_TRUNC(mrs->probability * 1000);
+ eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
- p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u"
- " %4u(%4u) %9llu(%9llu)\n",
- tp / 10, tp % 10,
+ p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
+ " %3u.%1u %3u %3u %-3u "
+ "%9llu %-9llu\n",
+ tp_max / 10, tp_max % 10,
+ tp_avg / 10, tp_avg % 10,
eprob / 10, eprob % 10,
+ mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10,
prob / 10, prob % 10,
+ mrs->retry_count,
mrs->last_success,
mrs->last_attempts,
(unsigned long long)mrs->succ_hist,
@@ -107,25 +137,75 @@ minstrel_stats_open(struct inode *inode, struct file *file)
return 0;
}
-ssize_t
-minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
+static const struct file_operations minstrel_stat_fops = {
+ .owner = THIS_MODULE,
+ .open = minstrel_stats_open,
+ .read = minstrel_stats_read,
+ .release = minstrel_stats_release,
+ .llseek = default_llseek,
+};
+
+int
+minstrel_stats_csv_open(struct inode *inode, struct file *file)
{
+ struct minstrel_sta_info *mi = inode->i_private;
struct minstrel_debugfs_info *ms;
+ unsigned int i, tp_max, tp_avg, prob, eprob;
+ char *p;
- ms = file->private_data;
- return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
-}
+ ms = kmalloc(2048, GFP_KERNEL);
+ if (!ms)
+ return -ENOMEM;
+
+ file->private_data = ms;
+ p = ms->buf;
+
+ for (i = 0; i < mi->n_rates; i++) {
+ struct minstrel_rate *mr = &mi->r[i];
+ struct minstrel_rate_stats *mrs = &mi->r[i].stats;
+
+ p += sprintf(p, "%s" ,((i == mi->max_tp_rate[0]) ? "A" : ""));
+ p += sprintf(p, "%s" ,((i == mi->max_tp_rate[1]) ? "B" : ""));
+ p += sprintf(p, "%s" ,((i == mi->max_tp_rate[2]) ? "C" : ""));
+ p += sprintf(p, "%s" ,((i == mi->max_tp_rate[3]) ? "D" : ""));
+ p += sprintf(p, "%s" ,((i == mi->max_prob_rate) ? "P" : ""));
+
+ p += sprintf(p, ",%u%s", mr->bitrate / 2,
+ (mr->bitrate & 1 ? ".5," : ","));
+ p += sprintf(p, "%u,", i);
+ p += sprintf(p, "%u,",mr->perfect_tx_time);
+
+ tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
+ tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
+ prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
+ eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+
+ p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,%u,"
+ "%llu,%llu,%d,%d\n",
+ tp_max / 10, tp_max % 10,
+ tp_avg / 10, tp_avg % 10,
+ eprob / 10, eprob % 10,
+ mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10,
+ prob / 10, prob % 10,
+ mrs->retry_count,
+ mrs->last_success,
+ mrs->last_attempts,
+ (unsigned long long)mrs->succ_hist,
+ (unsigned long long)mrs->att_hist,
+ mi->total_packets - mi->sample_packets,
+ mi->sample_packets);
+
+ }
+ ms->len = p - ms->buf;
+
+ WARN_ON(ms->len + sizeof(*ms) > 2048);
-int
-minstrel_stats_release(struct inode *inode, struct file *file)
-{
- kfree(file->private_data);
return 0;
}
-static const struct file_operations minstrel_stat_fops = {
+static const struct file_operations minstrel_stat_csv_fops = {
.owner = THIS_MODULE,
- .open = minstrel_stats_open,
+ .open = minstrel_stats_csv_open,
.read = minstrel_stats_read,
.release = minstrel_stats_release,
.llseek = default_llseek,
@@ -138,6 +218,9 @@ minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi,
&minstrel_stat_fops);
+
+ mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, dir,
+ mi, &minstrel_stat_csv_fops);
}
void
@@ -146,4 +229,6 @@ minstrel_remove_sta_debugfs(void *priv, void *priv_sta)
struct minstrel_sta_info *mi = priv_sta;
debugfs_remove(mi->dbg_stats);
+
+ debugfs_remove(mi->dbg_stats_csv);
}
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 60698fc7042e..7430a1df2ab1 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -313,67 +313,35 @@ minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES];
}
-
/*
- * Recalculate success probabilities and counters for a rate using EWMA
+ * Return current throughput based on the average A-MPDU length, taking into
+ * account the expected number of retransmissions and their expected length
*/
-static void
-minstrel_calc_rate_ewma(struct minstrel_rate_stats *mr)
+int
+minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
+ int prob_ewma)
{
- if (unlikely(mr->attempts > 0)) {
- mr->sample_skipped = 0;
- mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
- if (!mr->att_hist)
- mr->probability = mr->cur_prob;
- else
- mr->probability = minstrel_ewma(mr->probability,
- mr->cur_prob, EWMA_LEVEL);
- mr->att_hist += mr->attempts;
- mr->succ_hist += mr->success;
- } else {
- mr->sample_skipped++;
- }
- mr->last_success = mr->success;
- mr->last_attempts = mr->attempts;
- mr->success = 0;
- mr->attempts = 0;
-}
-
-/*
- * Calculate throughput based on the average A-MPDU length, taking into account
- * the expected number of retransmissions and their expected length
- */
-static void
-minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
-{
- struct minstrel_rate_stats *mr;
unsigned int nsecs = 0;
- unsigned int tp;
- unsigned int prob;
- mr = &mi->groups[group].rates[rate];
- prob = mr->probability;
-
- if (prob < MINSTREL_FRAC(1, 10)) {
- mr->cur_tp = 0;
- return;
- }
-
- /*
- * For the throughput calculation, limit the probability value to 90% to
- * account for collision related packet error rate fluctuation
- */
- if (prob > MINSTREL_FRAC(9, 10))
- prob = MINSTREL_FRAC(9, 10);
+ /* do not account throughput if sucess prob is below 10% */
+ if (prob_ewma < MINSTREL_FRAC(10, 100))
+ return 0;
if (group != MINSTREL_CCK_GROUP)
nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
nsecs += minstrel_mcs_groups[group].duration[rate];
- /* prob is scaled - see MINSTREL_FRAC above */
- tp = 1000000 * ((prob * 1000) / nsecs);
- mr->cur_tp = MINSTREL_TRUNC(tp);
+ /*
+ * For the throughput calculation, limit the probability value to 90% to
+ * account for collision related packet error rate fluctuation
+ * (prob is scaled - see MINSTREL_FRAC above)
+ */
+ if (prob_ewma > MINSTREL_FRAC(90, 100))
+ return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000)
+ / nsecs));
+ else
+ return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs));
}
/*
@@ -387,22 +355,23 @@ static void
minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index,
u16 *tp_list)
{
- int cur_group, cur_idx, cur_thr, cur_prob;
- int tmp_group, tmp_idx, tmp_thr, tmp_prob;
+ int cur_group, cur_idx, cur_tp_avg, cur_prob;
+ int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob;
int j = MAX_THR_RATES;
cur_group = index / MCS_GROUP_RATES;
cur_idx = index % MCS_GROUP_RATES;
- cur_thr = mi->groups[cur_group].rates[cur_idx].cur_tp;
- cur_prob = mi->groups[cur_group].rates[cur_idx].probability;
+ cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma;
+ cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob);
do {
tmp_group = tp_list[j - 1] / MCS_GROUP_RATES;
tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES;
- tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability;
- if (cur_thr < tmp_thr ||
- (cur_thr == tmp_thr && cur_prob <= tmp_prob))
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx,
+ tmp_prob);
+ if (cur_tp_avg < tmp_tp_avg ||
+ (cur_tp_avg == tmp_tp_avg && cur_prob <= tmp_prob))
break;
j--;
} while (j > 0);
@@ -422,16 +391,21 @@ static void
minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
{
struct minstrel_mcs_group_data *mg;
- struct minstrel_rate_stats *mr;
- int tmp_group, tmp_idx, tmp_tp, tmp_prob, max_tp_group;
+ struct minstrel_rate_stats *mrs;
+ int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob;
+ int max_tp_group, cur_tp_avg, cur_group, cur_idx;
+ int max_gpr_group, max_gpr_idx;
+ int max_gpr_tp_avg, max_gpr_prob;
+ cur_group = index / MCS_GROUP_RATES;
+ cur_idx = index % MCS_GROUP_RATES;
mg = &mi->groups[index / MCS_GROUP_RATES];
- mr = &mg->rates[index % MCS_GROUP_RATES];
+ mrs = &mg->rates[index % MCS_GROUP_RATES];
tmp_group = mi->max_prob_rate / MCS_GROUP_RATES;
tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES;
- tmp_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
/* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from
* MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */
@@ -440,15 +414,24 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
(max_tp_group != MINSTREL_CCK_GROUP))
return;
- if (mr->probability > MINSTREL_FRAC(75, 100)) {
- if (mr->cur_tp > tmp_tp)
+ if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
+ cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
+ mrs->prob_ewma);
+ if (cur_tp_avg > tmp_tp_avg)
mi->max_prob_rate = index;
- if (mr->cur_tp > mg->rates[mg->max_group_prob_rate].cur_tp)
+
+ max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
+ max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+ max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+ max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group,
+ max_gpr_idx,
+ max_gpr_prob);
+ if (cur_tp_avg > max_gpr_tp_avg)
mg->max_group_prob_rate = index;
} else {
- if (mr->probability > tmp_prob)
+ if (mrs->prob_ewma > tmp_prob)
mi->max_prob_rate = index;
- if (mr->probability > mg->rates[mg->max_group_prob_rate].probability)
+ if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma)
mg->max_group_prob_rate = index;
}
}
@@ -465,16 +448,18 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
u16 tmp_mcs_tp_rate[MAX_THR_RATES],
u16 tmp_cck_tp_rate[MAX_THR_RATES])
{
- unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp;
+ unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob;
int i;
tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES;
tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES;
- tmp_cck_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES;
tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES;
- tmp_mcs_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
if (tmp_cck_tp > tmp_mcs_tp) {
for(i = 0; i < MAX_THR_RATES; i++) {
@@ -493,8 +478,7 @@ static inline void
minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
{
struct minstrel_mcs_group_data *mg;
- struct minstrel_rate_stats *mr;
- int tmp_max_streams, group;
+ int tmp_max_streams, group, tmp_idx, tmp_prob;
int tmp_tp = 0;
tmp_max_streams = minstrel_mcs_groups[mi->max_tp_rate[0] /
@@ -503,11 +487,16 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
mg = &mi->groups[group];
if (!mg->supported || group == MINSTREL_CCK_GROUP)
continue;
- mr = minstrel_get_ratestats(mi, mg->max_group_prob_rate);
- if (tmp_tp < mr->cur_tp &&
+
+ tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+ tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma;
+
+ if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) &&
(minstrel_mcs_groups[group].streams < tmp_max_streams)) {
mi->max_prob_rate = mg->max_group_prob_rate;
- tmp_tp = mr->cur_tp;
+ tmp_tp = minstrel_ht_get_tp_avg(mi, group,
+ tmp_idx,
+ tmp_prob);
}
}
}
@@ -525,8 +514,8 @@ static void
minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct minstrel_mcs_group_data *mg;
- struct minstrel_rate_stats *mr;
- int group, i, j;
+ struct minstrel_rate_stats *mrs;
+ int group, i, j, cur_prob;
u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES];
u16 tmp_cck_tp_rate[MAX_THR_RATES], index;
@@ -565,12 +554,12 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
index = MCS_GROUP_RATES * group + i;
- mr = &mg->rates[i];
- mr->retry_updated = false;
- minstrel_calc_rate_ewma(mr);
- minstrel_ht_calc_tp(mi, group, i);
+ mrs = &mg->rates[i];
+ mrs->retry_updated = false;
+ minstrel_calc_rate_stats(mrs);
+ cur_prob = mrs->prob_ewma;
- if (!mr->cur_tp)
+ if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0)
continue;
/* Find max throughput rate set */
@@ -614,7 +603,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
#endif
/* Reset update timer */
- mi->stats_update = jiffies;
+ mi->last_stats_update = jiffies;
}
static bool
@@ -637,7 +626,7 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rat
}
static void
-minstrel_next_sample_idx(struct minstrel_ht_sta *mi)
+minstrel_set_next_sample_idx(struct minstrel_ht_sta *mi)
{
struct minstrel_mcs_group_data *mg;
@@ -778,7 +767,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
update = true;
}
- if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) {
+ if (time_after(jiffies, mi->last_stats_update +
+ (mp->update_interval / 2 * HZ) / 1000)) {
update = true;
minstrel_ht_update_stats(mp, mi);
}
@@ -791,7 +781,7 @@ static void
minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
int index)
{
- struct minstrel_rate_stats *mr;
+ struct minstrel_rate_stats *mrs;
const struct mcs_group *group;
unsigned int tx_time, tx_time_rtscts, tx_time_data;
unsigned int cw = mp->cw_min;
@@ -800,16 +790,16 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len);
unsigned int overhead = 0, overhead_rtscts = 0;
- mr = minstrel_get_ratestats(mi, index);
- if (mr->probability < MINSTREL_FRAC(1, 10)) {
- mr->retry_count = 1;
- mr->retry_count_rtscts = 1;
+ mrs = minstrel_get_ratestats(mi, index);
+ if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) {
+ mrs->retry_count = 1;
+ mrs->retry_count_rtscts = 1;
return;
}
- mr->retry_count = 2;
- mr->retry_count_rtscts = 2;
- mr->retry_updated = true;
+ mrs->retry_count = 2;
+ mrs->retry_count_rtscts = 2;
+ mrs->retry_updated = true;
group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;
@@ -840,9 +830,9 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
tx_time_rtscts += ctime + overhead_rtscts + tx_time_data;
if (tx_time_rtscts < mp->segment_size)
- mr->retry_count_rtscts++;
+ mrs->retry_count_rtscts++;
} while ((tx_time < mp->segment_size) &&
- (++mr->retry_count < mp->max_retry));
+ (++mrs->retry_count < mp->max_retry));
}
@@ -851,22 +841,22 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
struct ieee80211_sta_rates *ratetbl, int offset, int index)
{
const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
- struct minstrel_rate_stats *mr;
+ struct minstrel_rate_stats *mrs;
u8 idx;
u16 flags = group->flags;
- mr = minstrel_get_ratestats(mi, index);
- if (!mr->retry_updated)
+ mrs = minstrel_get_ratestats(mi, index);
+ if (!mrs->retry_updated)
minstrel_calc_retransmit(mp, mi, index);
- if (mr->probability < MINSTREL_FRAC(20, 100) || !mr->retry_count) {
+ if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) {
ratetbl->rate[offset].count = 2;
ratetbl->rate[offset].count_rts = 2;
ratetbl->rate[offset].count_cts = 2;
} else {
- ratetbl->rate[offset].count = mr->retry_count;
- ratetbl->rate[offset].count_cts = mr->retry_count;
- ratetbl->rate[offset].count_rts = mr->retry_count_rtscts;
+ ratetbl->rate[offset].count = mrs->retry_count;
+ ratetbl->rate[offset].count_cts = mrs->retry_count;
+ ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts;
}
if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP)
@@ -924,7 +914,7 @@ minstrel_get_duration(int index)
static int
minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
- struct minstrel_rate_stats *mr;
+ struct minstrel_rate_stats *mrs;
struct minstrel_mcs_group_data *mg;
unsigned int sample_dur, sample_group, cur_max_tp_streams;
int sample_idx = 0;
@@ -940,12 +930,12 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
sample_group = mi->sample_group;
mg = &mi->groups[sample_group];
sample_idx = sample_table[mg->column][mg->index];
- minstrel_next_sample_idx(mi);
+ minstrel_set_next_sample_idx(mi);
if (!(mg->supported & BIT(sample_idx)))
return -1;
- mr = &mg->rates[sample_idx];
+ mrs = &mg->rates[sample_idx];
sample_idx += sample_group * MCS_GROUP_RATES;
/*
@@ -962,7 +952,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
* Do not sample if the probability is already higher than 95%
* to avoid wasting airtime.
*/
- if (mr->probability > MINSTREL_FRAC(95, 100))
+ if (mrs->prob_ewma > MINSTREL_FRAC(95, 100))
return -1;
/*
@@ -977,7 +967,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
(cur_max_tp_streams - 1 <
minstrel_mcs_groups[sample_group].streams ||
sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {
- if (mr->sample_skipped < 20)
+ if (mrs->sample_skipped < 20)
return -1;
if (mi->sample_slow++ > 2)
@@ -1131,7 +1121,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
memset(mi, 0, sizeof(*mi));
mi->sta = sta;
- mi->stats_update = jiffies;
+ mi->last_stats_update = jiffies;
ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0);
mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0);
@@ -1328,16 +1318,19 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
{
struct minstrel_ht_sta_priv *msp = priv_sta;
struct minstrel_ht_sta *mi = &msp->ht;
- int i, j;
+ int i, j, prob, tp_avg;
if (!msp->is_ht)
return mac80211_minstrel.get_expected_throughput(priv_sta);
i = mi->max_tp_rate[0] / MCS_GROUP_RATES;
j = mi->max_tp_rate[0] % MCS_GROUP_RATES;
+ prob = mi->groups[i].rates[j].prob_ewma;
+
+ /* convert tp_avg from pkt per second in kbps */
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024;
- /* convert cur_tp from pkt per second in kbps */
- return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024;
+ return tp_avg;
}
static const struct rate_control_ops mac80211_minstrel_ht = {
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index f2217d6aa0c2..e8b52a94d24b 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -78,7 +78,7 @@ struct minstrel_ht_sta {
u16 max_prob_rate;
/* time of last status update */
- unsigned long stats_update;
+ unsigned long last_stats_update;
/* overhead time in usec for each frame */
unsigned int overhead;
@@ -112,6 +112,7 @@ struct minstrel_ht_sta_priv {
};
#ifdef CONFIG_MAC80211_DEBUGFS
struct dentry *dbg_stats;
+ struct dentry *dbg_stats_csv;
#endif
void *ratelist;
void *sample_table;
@@ -120,5 +121,7 @@ struct minstrel_ht_sta_priv {
void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta);
+int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
+ int prob_ewma);
#endif
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 20c676b8e5b6..6822ce0f95e5 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -19,7 +19,7 @@ static char *
minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
{
const struct mcs_group *mg;
- unsigned int j, tp, prob, eprob;
+ unsigned int j, tp_max, tp_avg, prob, eprob, tx_time;
char htmode = '2';
char gimode = 'L';
u32 gflags;
@@ -38,19 +38,26 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
gimode = 'S';
for (j = 0; j < MCS_GROUP_RATES; j++) {
- struct minstrel_rate_stats *mr = &mi->groups[i].rates[j];
+ struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
static const int bitrates[4] = { 10, 20, 55, 110 };
int idx = i * MCS_GROUP_RATES + j;
if (!(mi->groups[i].supported & BIT(j)))
continue;
- if (gflags & IEEE80211_TX_RC_MCS)
- p += sprintf(p, " HT%c0/%cGI ", htmode, gimode);
- else if (gflags & IEEE80211_TX_RC_VHT_MCS)
- p += sprintf(p, "VHT%c0/%cGI ", htmode, gimode);
- else
- p += sprintf(p, " CCK/%cP ", j < 4 ? 'L' : 'S');
+ if (gflags & IEEE80211_TX_RC_MCS) {
+ p += sprintf(p, "HT%c0 ", htmode);
+ p += sprintf(p, "%cGI ", gimode);
+ p += sprintf(p, "%d ", mg->streams);
+ } else if (gflags & IEEE80211_TX_RC_VHT_MCS) {
+ p += sprintf(p, "VHT%c0 ", htmode);
+ p += sprintf(p, "%cGI ", gimode);
+ p += sprintf(p, "%d ", mg->streams);
+ } else {
+ p += sprintf(p, "CCK ");
+ p += sprintf(p, "%cP ", j < 4 ? 'L' : 'S');
+ p += sprintf(p, "1 ");
+ }
*(p++) = (idx == mi->max_tp_rate[0]) ? 'A' : ' ';
*(p++) = (idx == mi->max_tp_rate[1]) ? 'B' : ' ';
@@ -59,29 +66,39 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
*(p++) = (idx == mi->max_prob_rate) ? 'P' : ' ';
if (gflags & IEEE80211_TX_RC_MCS) {
- p += sprintf(p, " MCS%-2u ", (mg->streams - 1) * 8 + j);
+ p += sprintf(p, " MCS%-2u", (mg->streams - 1) * 8 + j);
} else if (gflags & IEEE80211_TX_RC_VHT_MCS) {
- p += sprintf(p, " MCS%-1u/%1u", j, mg->streams);
+ p += sprintf(p, " MCS%-1u/%1u", j, mg->streams);
} else {
int r = bitrates[j % 4];
- p += sprintf(p, " %2u.%1uM ", r / 10, r % 10);
+ p += sprintf(p, " %2u.%1uM", r / 10, r % 10);
}
- tp = mr->cur_tp / 10;
- prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
- eprob = MINSTREL_TRUNC(mr->probability * 1000);
+ p += sprintf(p, " %3u ", idx);
- p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u "
- "%3u %4u(%4u) %9llu(%9llu)\n",
- tp / 10, tp % 10,
+ /* tx_time[rate(i)] in usec */
+ tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000);
+ p += sprintf(p, "%6u ", tx_time);
+
+ tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
+ prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
+ eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+
+ p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
+ " %3u.%1u %3u %3u %-3u "
+ "%9llu %-9llu\n",
+ tp_max / 10, tp_max % 10,
+ tp_avg / 10, tp_avg % 10,
eprob / 10, eprob % 10,
+ mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10,
prob / 10, prob % 10,
- mr->retry_count,
- mr->last_success,
- mr->last_attempts,
- (unsigned long long)mr->succ_hist,
- (unsigned long long)mr->att_hist);
+ mrs->retry_count,
+ mrs->last_success,
+ mrs->last_attempts,
+ (unsigned long long)mrs->succ_hist,
+ (unsigned long long)mrs->att_hist);
}
return p;
@@ -94,8 +111,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
struct minstrel_ht_sta *mi = &msp->ht;
struct minstrel_debugfs_info *ms;
unsigned int i;
- char *p;
int ret;
+ char *p;
if (!msp->is_ht) {
inode->i_private = &msp->legacy;
@@ -110,8 +127,14 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
file->private_data = ms;
p = ms->buf;
- p += sprintf(p, " type rate tpt eprob *prob "
- "ret *ok(*cum) ok( cum)\n");
+
+ p += sprintf(p, "\n");
+ p += sprintf(p, " best ____________rate__________ "
+ "______statistics______ ________last_______ "
+ "______sum-of________\n");
+ p += sprintf(p, "mode guard # rate [name idx airtime max_tp] "
+ "[ ø(tp) ø(prob) sd(prob)] [prob.|retry|suc|att] [#success | "
+ "#attempts]\n");
p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p);
for (i = 0; i < MINSTREL_CCK_GROUP; i++)
@@ -123,11 +146,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
"lookaround %d\n",
max(0, (int) mi->total_packets - (int) mi->sample_packets),
mi->sample_packets);
- p += sprintf(p, "Average A-MPDU length: %d.%d\n",
+ p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n",
MINSTREL_TRUNC(mi->avg_ampdu_len),
MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
ms->len = p - ms->buf;
-
WARN_ON(ms->len + sizeof(*ms) > 32768);
return nonseekable_open(inode, file);
@@ -141,6 +163,143 @@ static const struct file_operations minstrel_ht_stat_fops = {
.llseek = no_llseek,
};
+static char *
+minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
+{
+ const struct mcs_group *mg;
+ unsigned int j, tp_max, tp_avg, prob, eprob, tx_time;
+ char htmode = '2';
+ char gimode = 'L';
+ u32 gflags;
+
+ if (!mi->groups[i].supported)
+ return p;
+
+ mg = &minstrel_mcs_groups[i];
+ gflags = mg->flags;
+
+ if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+ htmode = '4';
+ else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+ htmode = '8';
+ if (gflags & IEEE80211_TX_RC_SHORT_GI)
+ gimode = 'S';
+
+ for (j = 0; j < MCS_GROUP_RATES; j++) {
+ struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
+ static const int bitrates[4] = { 10, 20, 55, 110 };
+ int idx = i * MCS_GROUP_RATES + j;
+
+ if (!(mi->groups[i].supported & BIT(j)))
+ continue;
+
+ if (gflags & IEEE80211_TX_RC_MCS) {
+ p += sprintf(p, "HT%c0,", htmode);
+ p += sprintf(p, "%cGI,", gimode);
+ p += sprintf(p, "%d,", mg->streams);
+ } else if (gflags & IEEE80211_TX_RC_VHT_MCS) {
+ p += sprintf(p, "VHT%c0,", htmode);
+ p += sprintf(p, "%cGI,", gimode);
+ p += sprintf(p, "%d,", mg->streams);
+ } else {
+ p += sprintf(p, "CCK,");
+ p += sprintf(p, "%cP,", j < 4 ? 'L' : 'S');
+ p += sprintf(p, "1,");
+ }
+
+ p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[0]) ? "A" : ""));
+ p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[1]) ? "B" : ""));
+ p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[2]) ? "C" : ""));
+ p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[3]) ? "D" : ""));
+ p += sprintf(p, "%s" ,((idx == mi->max_prob_rate) ? "P" : ""));
+
+ if (gflags & IEEE80211_TX_RC_MCS) {
+ p += sprintf(p, ",MCS%-2u,", (mg->streams - 1) * 8 + j);
+ } else if (gflags & IEEE80211_TX_RC_VHT_MCS) {
+ p += sprintf(p, ",MCS%-1u/%1u,", j, mg->streams);
+ } else {
+ int r = bitrates[j % 4];
+ p += sprintf(p, ",%2u.%1uM,", r / 10, r % 10);
+ }
+
+ p += sprintf(p, "%u,", idx);
+ tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000);
+ p += sprintf(p, "%u,", tx_time);
+
+ tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
+ prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
+ eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+
+ p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,"
+ "%u,%llu,%llu,",
+ tp_max / 10, tp_max % 10,
+ tp_avg / 10, tp_avg % 10,
+ eprob / 10, eprob % 10,
+ mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10,
+ prob / 10, prob % 10,
+ mrs->retry_count,
+ mrs->last_success,
+ mrs->last_attempts,
+ (unsigned long long)mrs->succ_hist,
+ (unsigned long long)mrs->att_hist);
+ p += sprintf(p, "%d,%d,%d.%d\n",
+ max(0, (int) mi->total_packets -
+ (int) mi->sample_packets),
+ mi->sample_packets,
+ MINSTREL_TRUNC(mi->avg_ampdu_len),
+ MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
+ }
+
+ return p;
+}
+
+static int
+minstrel_ht_stats_csv_open(struct inode *inode, struct file *file)
+{
+ struct minstrel_ht_sta_priv *msp = inode->i_private;
+ struct minstrel_ht_sta *mi = &msp->ht;
+ struct minstrel_debugfs_info *ms;
+ unsigned int i;
+ int ret;
+ char *p;
+
+ if (!msp->is_ht) {
+ inode->i_private = &msp->legacy;
+ ret = minstrel_stats_csv_open(inode, file);
+ inode->i_private = msp;
+ return ret;
+ }
+
+ ms = kmalloc(32768, GFP_KERNEL);
+
+ if (!ms)
+ return -ENOMEM;
+
+ file->private_data = ms;
+
+ p = ms->buf;
+
+ p = minstrel_ht_stats_csv_dump(mi, MINSTREL_CCK_GROUP, p);
+ for (i = 0; i < MINSTREL_CCK_GROUP; i++)
+ p = minstrel_ht_stats_csv_dump(mi, i, p);
+ for (i++; i < ARRAY_SIZE(mi->groups); i++)
+ p = minstrel_ht_stats_csv_dump(mi, i, p);
+
+ ms->len = p - ms->buf;
+ WARN_ON(ms->len + sizeof(*ms) > 32768);
+
+ return nonseekable_open(inode, file);
+}
+
+static const struct file_operations minstrel_ht_stat_csv_fops = {
+ .owner = THIS_MODULE,
+ .open = minstrel_ht_stats_csv_open,
+ .read = minstrel_stats_read,
+ .release = minstrel_stats_release,
+ .llseek = no_llseek,
+};
+
void
minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
{
@@ -148,6 +307,8 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp,
&minstrel_ht_stat_fops);
+ msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO,
+ dir, msp, &minstrel_ht_stat_csv_fops);
}
void
@@ -156,4 +317,5 @@ minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta)
struct minstrel_ht_sta_priv *msp = priv_sta;
debugfs_remove(msp->dbg_stats);
+ debugfs_remove(msp->dbg_stats_csv);
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2cd02278d4d4..260eed45b6d2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1185,6 +1185,7 @@ static void sta_ps_start(struct sta_info *sta)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct ps_data *ps;
+ int tid;
if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -1198,6 +1199,18 @@ static void sta_ps_start(struct sta_info *sta)
drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
ps_dbg(sdata, "STA %pM aid %d enters power save mode\n",
sta->sta.addr, sta->sta.aid);
+
+ if (!sta->sta.txq[0])
+ return;
+
+ for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
+ struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
+
+ if (!skb_queue_len(&txqi->queue))
+ set_bit(tid, &sta->txq_buffered_tids);
+ else
+ clear_bit(tid, &sta->txq_buffered_tids);
+ }
}
static void sta_ps_end(struct sta_info *sta)
@@ -3424,7 +3437,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
__le16 fc;
struct ieee80211_rx_data rx;
struct ieee80211_sub_if_data *prev;
- struct sta_info *sta, *tmp, *prev_sta;
+ struct sta_info *sta, *prev_sta;
+ struct rhash_head *tmp;
int err = 0;
fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
@@ -3459,9 +3473,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
ieee80211_scan_rx(local, skb);
if (ieee80211_is_data(fc)) {
+ const struct bucket_table *tbl;
+
prev_sta = NULL;
- for_each_sta_info(local, hdr->addr2, sta, tmp) {
+ tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
+
+ for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) {
if (!prev_sta) {
prev_sta = sta;
continue;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index aacaa1a85e63..12971b71d0fa 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -64,32 +64,20 @@
* freed before they are done using it.
*/
+static const struct rhashtable_params sta_rht_params = {
+ .nelem_hint = 3, /* start small */
+ .head_offset = offsetof(struct sta_info, hash_node),
+ .key_offset = offsetof(struct sta_info, sta.addr),
+ .key_len = ETH_ALEN,
+ .hashfn = sta_addr_hash,
+};
+
/* Caller must hold local->sta_mtx */
static int sta_info_hash_del(struct ieee80211_local *local,
struct sta_info *sta)
{
- struct sta_info *s;
-
- s = rcu_dereference_protected(local->sta_hash[STA_HASH(sta->sta.addr)],
- lockdep_is_held(&local->sta_mtx));
- if (!s)
- return -ENOENT;
- if (s == sta) {
- rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)],
- s->hnext);
- return 0;
- }
-
- while (rcu_access_pointer(s->hnext) &&
- rcu_access_pointer(s->hnext) != sta)
- s = rcu_dereference_protected(s->hnext,
- lockdep_is_held(&local->sta_mtx));
- if (rcu_access_pointer(s->hnext)) {
- rcu_assign_pointer(s->hnext, sta->hnext);
- return 0;
- }
-
- return -ENOENT;
+ return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node,
+ sta_rht_params);
}
static void __cleanup_single_sta(struct sta_info *sta)
@@ -118,6 +106,16 @@ static void __cleanup_single_sta(struct sta_info *sta)
atomic_dec(&ps->num_sta_ps);
}
+ if (sta->sta.txq[0]) {
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
+ int n = skb_queue_len(&txqi->queue);
+
+ ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
+ atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]);
+ }
+ }
+
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]);
@@ -159,18 +157,8 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
const u8 *addr)
{
struct ieee80211_local *local = sdata->local;
- struct sta_info *sta;
- sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)],
- lockdep_is_held(&local->sta_mtx));
- while (sta) {
- if (sta->sdata == sdata &&
- ether_addr_equal(sta->sta.addr, addr))
- break;
- sta = rcu_dereference_check(sta->hnext,
- lockdep_is_held(&local->sta_mtx));
- }
- return sta;
+ return rhashtable_lookup_fast(&local->sta_hash, addr, sta_rht_params);
}
/*
@@ -182,18 +170,24 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
+ struct rhash_head *tmp;
+ const struct bucket_table *tbl;
- sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)],
- lockdep_is_held(&local->sta_mtx));
- while (sta) {
- if ((sta->sdata == sdata ||
- (sta->sdata->bss && sta->sdata->bss == sdata->bss)) &&
- ether_addr_equal(sta->sta.addr, addr))
- break;
- sta = rcu_dereference_check(sta->hnext,
- lockdep_is_held(&local->sta_mtx));
+ rcu_read_lock();
+ tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
+
+ for_each_sta_info(local, tbl, addr, sta, tmp) {
+ if (sta->sdata == sdata ||
+ (sta->sdata->bss && sta->sdata->bss == sdata->bss)) {
+ rcu_read_unlock();
+ /* this is safe as the caller must already hold
+ * another rcu read section or the mutex
+ */
+ return sta;
+ }
}
- return sta;
+ rcu_read_unlock();
+ return NULL;
}
struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
@@ -234,6 +228,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr);
+ if (sta->sta.txq[0])
+ kfree(to_txq_info(sta->sta.txq[0]));
kfree(rcu_dereference_raw(sta->sta.rates));
kfree(sta);
}
@@ -242,9 +238,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
static void sta_info_hash_add(struct ieee80211_local *local,
struct sta_info *sta)
{
- lockdep_assert_held(&local->sta_mtx);
- sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)];
- rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
+ rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
+ sta_rht_params);
}
static void sta_deliver_ps_frames(struct work_struct *wk)
@@ -285,11 +280,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
const u8 *addr, gfp_t gfp)
{
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_hw *hw = &local->hw;
struct sta_info *sta;
struct timespec uptime;
int i;
- sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp);
+ sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp);
if (!sta)
return NULL;
@@ -321,11 +317,25 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++)
ewma_init(&sta->chain_signal_avg[i], 1024, 8);
- if (sta_prepare_rate_control(local, sta, gfp)) {
- kfree(sta);
- return NULL;
+ if (local->ops->wake_tx_queue) {
+ void *txq_data;
+ int size = sizeof(struct txq_info) +
+ ALIGN(hw->txq_data_size, sizeof(void *));
+
+ txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp);
+ if (!txq_data)
+ goto free;
+
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ struct txq_info *txq = txq_data + i * size;
+
+ ieee80211_init_tx_queue(sdata, sta, txq, i);
+ }
}
+ if (sta_prepare_rate_control(local, sta, gfp))
+ goto free_txq;
+
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
/*
* timer_to_tid must be initialized with identity mapping
@@ -346,7 +356,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
struct ieee80211_supported_band *sband =
- local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)];
+ hw->wiphy->bands[ieee80211_get_sdata_band(sdata)];
u8 smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >>
IEEE80211_HT_CAP_SM_PS_SHIFT;
/*
@@ -371,6 +381,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
return sta;
+
+free_txq:
+ if (sta->sta.txq[0])
+ kfree(to_txq_info(sta->sta.txq[0]));
+free:
+ kfree(sta);
+ return NULL;
}
static int sta_info_insert_check(struct sta_info *sta)
@@ -640,6 +657,8 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
indicate_tim |=
sta->driver_buffered_tids & tids;
+ indicate_tim |=
+ sta->txq_buffered_tids & tids;
}
done:
@@ -948,19 +967,32 @@ static void sta_info_cleanup(unsigned long data)
round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL));
}
-void sta_info_init(struct ieee80211_local *local)
+u32 sta_addr_hash(const void *key, u32 length, u32 seed)
+{
+ return jhash(key, ETH_ALEN, seed);
+}
+
+int sta_info_init(struct ieee80211_local *local)
{
+ int err;
+
+ err = rhashtable_init(&local->sta_hash, &sta_rht_params);
+ if (err)
+ return err;
+
spin_lock_init(&local->tim_lock);
mutex_init(&local->sta_mtx);
INIT_LIST_HEAD(&local->sta_list);
setup_timer(&local->sta_cleanup, sta_info_cleanup,
(unsigned long)local);
+ return 0;
}
void sta_info_stop(struct ieee80211_local *local)
{
del_timer_sync(&local->sta_cleanup);
+ rhashtable_destroy(&local->sta_hash);
}
@@ -1024,16 +1056,21 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
}
struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
- const u8 *addr,
- const u8 *localaddr)
+ const u8 *addr,
+ const u8 *localaddr)
{
- struct sta_info *sta, *nxt;
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct sta_info *sta;
+ struct rhash_head *tmp;
+ const struct bucket_table *tbl;
+
+ tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
/*
* Just return a random station if localaddr is NULL
* ... first in list.
*/
- for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
+ for_each_sta_info(local, tbl, addr, sta, tmp) {
if (localaddr &&
!ether_addr_equal(sta->sdata->vif.addr, localaddr))
continue;
@@ -1071,7 +1108,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct sk_buff_head pending;
- int filtered = 0, buffered = 0, ac;
+ int filtered = 0, buffered = 0, ac, i;
unsigned long flags;
struct ps_data *ps;
@@ -1090,10 +1127,22 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1);
sta->driver_buffered_tids = 0;
+ sta->txq_buffered_tids = 0;
if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
+ if (sta->sta.txq[0]) {
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
+
+ if (!skb_queue_len(&txqi->queue))
+ continue;
+
+ drv_wake_tx_queue(local, txqi);
+ }
+ }
+
skb_queue_head_init(&pending);
/* sync with ieee80211_tx_h_unicast_ps_buf */
@@ -1275,8 +1324,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
/* if we already have frames from software, then we can't also
* release from hardware queues
*/
- if (skb_queue_empty(&frames))
+ if (skb_queue_empty(&frames)) {
driver_release_tids |= sta->driver_buffered_tids & tids;
+ driver_release_tids |= sta->txq_buffered_tids & tids;
+ }
if (driver_release_tids) {
/* If the driver has data on more than one TID then
@@ -1447,6 +1498,9 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
sta_info_recalc_tim(sta);
} else {
+ unsigned long tids = sta->txq_buffered_tids & driver_release_tids;
+ int tid;
+
/*
* We need to release a frame that is buffered somewhere in the
* driver ... it'll have to handle that.
@@ -1466,8 +1520,22 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
* that the TID(s) became empty before returning here from the
* release function.
* Either way, however, when the driver tells us that the TID(s)
- * became empty we'll do the TIM recalculation.
+ * became empty or we find that a txq became empty, we'll do the
+ * TIM recalculation.
*/
+
+ if (!sta->sta.txq[0])
+ return;
+
+ for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
+ struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
+
+ if (!(tids & BIT(tid)) || skb_queue_len(&txqi->queue))
+ continue;
+
+ sta_info_recalc_tim(sta);
+ break;
+ }
}
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 7e2fa4018d41..5c164fb3f6c5 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -16,6 +16,7 @@
#include <linux/workqueue.h>
#include <linux/average.h>
#include <linux/etherdevice.h>
+#include <linux/rhashtable.h>
#include "key.h"
/**
@@ -248,7 +249,7 @@ struct sta_ampdu_mlme {
*
* @list: global linked list entry
* @free_list: list entry for keeping track of stations to free
- * @hnext: hash table linked list pointer
+ * @hash_node: hash node for rhashtable
* @local: pointer to the global information
* @sdata: virtual interface this station belongs to
* @ptk: peer keys negotiated with this station, if any
@@ -276,6 +277,7 @@ struct sta_ampdu_mlme {
* entered power saving state, these are also delivered to
* the station when it leaves powersave or polls for frames
* @driver_buffered_tids: bitmap of TIDs the driver has data buffered on
+ * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on
* @rx_packets: Number of MSDUs received from this STA
* @rx_bytes: Number of bytes received from this STA
* @last_rx: time (in jiffies) when last frame was received from this STA
@@ -341,7 +343,7 @@ struct sta_info {
/* General information, mostly static */
struct list_head list, free_list;
struct rcu_head rcu_head;
- struct sta_info __rcu *hnext;
+ struct rhash_head hash_node;
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
@@ -370,6 +372,7 @@ struct sta_info {
struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS];
struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS];
unsigned long driver_buffered_tids;
+ unsigned long txq_buffered_tids;
/* Updated from RX path only, no locking requirements */
unsigned long rx_packets;
@@ -537,10 +540,6 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid)
lockdep_is_held(&sta->ampdu_mlme.mtx));
}
-#define STA_HASH_SIZE 256
-#define STA_HASH(sta) (sta[5])
-
-
/* Maximum number of frames to buffer per power saving station per AC */
#define STA_MAX_TX_BUFFER 64
@@ -561,26 +560,15 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
const u8 *addr);
-static inline
-void for_each_sta_info_type_check(struct ieee80211_local *local,
- const u8 *addr,
- struct sta_info *sta,
- struct sta_info *nxt)
-{
-}
+u32 sta_addr_hash(const void *key, u32 length, u32 seed);
+
+#define _sta_bucket_idx(_tbl, _a) \
+ rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd))
-#define for_each_sta_info(local, _addr, _sta, nxt) \
- for ( /* initialise loop */ \
- _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\
- nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \
- /* typecheck */ \
- for_each_sta_info_type_check(local, (_addr), _sta, nxt),\
- /* continue condition */ \
- _sta; \
- /* advance loop */ \
- _sta = nxt, \
- nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \
- ) \
+#define for_each_sta_info(local, tbl, _addr, _sta, _tmp) \
+ rht_for_each_entry_rcu(_sta, _tmp, tbl, \
+ _sta_bucket_idx(tbl, _addr), \
+ hash_node) \
/* compare address and run code only if it matches */ \
if (ether_addr_equal(_sta->sta.addr, (_addr)))
@@ -617,7 +605,7 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
void sta_info_recalc_tim(struct sta_info *sta);
-void sta_info_init(struct ieee80211_local *local);
+int sta_info_init(struct ieee80211_local *local);
void sta_info_stop(struct ieee80211_local *local);
/**
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 2c51742428d5..005fdbe39a8b 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -654,7 +654,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
struct ieee80211_supported_band *sband;
struct ieee80211_sub_if_data *sdata;
struct net_device *prev_dev = NULL;
- struct sta_info *sta, *tmp;
+ struct sta_info *sta;
+ struct rhash_head *tmp;
int retry_count;
int rates_idx;
bool send_to_cooked;
@@ -663,6 +664,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
int rtap_len;
int shift = 0;
int tid = IEEE80211_NUM_TIDS;
+ const struct bucket_table *tbl;
rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
@@ -671,7 +673,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
sband = local->hw.wiphy->bands[info->band];
fc = hdr->frame_control;
- for_each_sta_info(local, hdr->addr1, sta, tmp) {
+ tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
+
+ for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) {
/* skip wrong virtual interface */
if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
continue;
diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c
index 386e45d8a958..edfe0c170a1c 100644
--- a/net/mac80211/trace.c
+++ b/net/mac80211/trace.c
@@ -8,6 +8,7 @@
#include "debug.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
+#include "trace_msg.h"
#ifdef CONFIG_MAC80211_MESSAGE_TRACING
void __sdata_info(const char *fmt, ...)
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index e9e462b349e5..4c2e7690226a 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2312,43 +2312,36 @@ TRACE_EVENT(drv_tdls_recv_channel_switch,
)
);
-#ifdef CONFIG_MAC80211_MESSAGE_TRACING
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mac80211_msg
-
-#define MAX_MSG_LEN 100
-
-DECLARE_EVENT_CLASS(mac80211_msg_event,
- TP_PROTO(struct va_format *vaf),
+TRACE_EVENT(drv_wake_tx_queue,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct txq_info *txq),
- TP_ARGS(vaf),
+ TP_ARGS(local, sdata, txq),
TP_STRUCT__entry(
- __dynamic_array(char, msg, MAX_MSG_LEN)
+ LOCAL_ENTRY
+ VIF_ENTRY
+ STA_ENTRY
+ __field(u8, ac)
+ __field(u8, tid)
),
TP_fast_assign(
- WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >= MAX_MSG_LEN);
- ),
+ struct ieee80211_sta *sta = txq->txq.sta;
- TP_printk("%s", __get_str(msg))
-);
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ STA_ASSIGN;
+ __entry->ac = txq->txq.ac;
+ __entry->tid = txq->txq.tid;
+ ),
-DEFINE_EVENT(mac80211_msg_event, mac80211_info,
- TP_PROTO(struct va_format *vaf),
- TP_ARGS(vaf)
-);
-DEFINE_EVENT(mac80211_msg_event, mac80211_dbg,
- TP_PROTO(struct va_format *vaf),
- TP_ARGS(vaf)
-);
-DEFINE_EVENT(mac80211_msg_event, mac80211_err,
- TP_PROTO(struct va_format *vaf),
- TP_ARGS(vaf)
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ac:%d tid:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ac, __entry->tid
+ )
);
-#endif
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
new file mode 100644
index 000000000000..768f7c22a190
--- /dev/null
+++ b/net/mac80211/trace_msg.h
@@ -0,0 +1,53 @@
+#ifdef CONFIG_MAC80211_MESSAGE_TRACING
+
+#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
+#define __MAC80211_MSG_DRIVER_TRACE
+
+#include <linux/tracepoint.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mac80211_msg
+
+#define MAX_MSG_LEN 100
+
+DECLARE_EVENT_CLASS(mac80211_msg_event,
+ TP_PROTO(struct va_format *vaf),
+
+ TP_ARGS(vaf),
+
+ TP_STRUCT__entry(
+ __dynamic_array(char, msg, MAX_MSG_LEN)
+ ),
+
+ TP_fast_assign(
+ WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+ MAX_MSG_LEN, vaf->fmt,
+ *vaf->va) >= MAX_MSG_LEN);
+ ),
+
+ TP_printk("%s", __get_str(msg))
+);
+
+DEFINE_EVENT(mac80211_msg_event, mac80211_info,
+ TP_PROTO(struct va_format *vaf),
+ TP_ARGS(vaf)
+);
+DEFINE_EVENT(mac80211_msg_event, mac80211_dbg,
+ TP_PROTO(struct va_format *vaf),
+ TP_ARGS(vaf)
+);
+DEFINE_EVENT(mac80211_msg_event, mac80211_err,
+ TP_PROTO(struct va_format *vaf),
+ TP_ARGS(vaf)
+);
+#endif /* !__MAC80211_MSG_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace_msg
+#include <trace/define_trace.h>
+
+#endif
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9f7fb4eec37b..667111ee6a20 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -767,12 +767,22 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
return TX_CONTINUE;
}
+static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid)
+{
+ u16 *seq = &sta->tid_seq[tid];
+ __le16 ret = cpu_to_le16(*seq);
+
+ /* Increase the sequence number. */
+ *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ;
+
+ return ret;
+}
+
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
- u16 *seq;
u8 *qc;
int tid;
@@ -823,13 +833,10 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
qc = ieee80211_get_qos_ctl(hdr);
tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
- seq = &tx->sta->tid_seq[tid];
tx->sta->tx_msdu[tid]++;
- hdr->seq_ctrl = cpu_to_le16(*seq);
-
- /* Increase the sequence number. */
- *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ;
+ if (!tx->sta->sta.txq[0])
+ hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
return TX_CONTINUE;
}
@@ -1070,7 +1077,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
* nothing -- this aggregation session is being started
* but that might still fail with the driver
*/
- } else {
+ } else if (!tx->sta->sta.txq[tid]) {
spin_lock(&tx->sta->lock);
/*
* Need to re-check now, because we may get here
@@ -1211,13 +1218,102 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
return TX_CONTINUE;
}
+static void ieee80211_drv_tx(struct ieee80211_local *local,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *pubsta,
+ struct sk_buff *skb)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_tx_control control = {
+ .sta = pubsta,
+ };
+ struct ieee80211_txq *txq = NULL;
+ struct txq_info *txqi;
+ u8 ac;
+
+ if (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)
+ goto tx_normal;
+
+ if (!ieee80211_is_data(hdr->frame_control))
+ goto tx_normal;
+
+ if (pubsta) {
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+
+ txq = pubsta->txq[tid];
+ } else if (vif) {
+ txq = vif->txq;
+ }
+
+ if (!txq)
+ goto tx_normal;
+
+ ac = txq->ac;
+ txqi = to_txq_info(txq);
+ atomic_inc(&sdata->txqs_len[ac]);
+ if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending)
+ netif_stop_subqueue(sdata->dev, ac);
+
+ skb_queue_tail(&txqi->queue, skb);
+ drv_wake_tx_queue(local, txqi);
+
+ return;
+
+tx_normal:
+ drv_tx(local, &control, skb);
+}
+
+struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
+ struct txq_info *txqi = container_of(txq, struct txq_info, txq);
+ struct ieee80211_hdr *hdr;
+ struct sk_buff *skb = NULL;
+ u8 ac = txq->ac;
+
+ spin_lock_bh(&txqi->queue.lock);
+
+ if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
+ goto out;
+
+ skb = __skb_dequeue(&txqi->queue);
+ if (!skb)
+ goto out;
+
+ atomic_dec(&sdata->txqs_len[ac]);
+ if (__netif_subqueue_stopped(sdata->dev, ac))
+ ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]);
+
+ hdr = (struct ieee80211_hdr *)skb->data;
+ if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
+ struct sta_info *sta = container_of(txq->sta, struct sta_info,
+ sta);
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+ hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid);
+ if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
+ info->flags |= IEEE80211_TX_CTL_AMPDU;
+ else
+ info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+ }
+
+out:
+ spin_unlock_bh(&txqi->queue.lock);
+
+ return skb;
+}
+EXPORT_SYMBOL(ieee80211_tx_dequeue);
+
static bool ieee80211_tx_frags(struct ieee80211_local *local,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
struct sk_buff_head *skbs,
bool txpending)
{
- struct ieee80211_tx_control control;
struct sk_buff *skb, *tmp;
unsigned long flags;
@@ -1275,10 +1371,9 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
info->control.vif = vif;
- control.sta = sta;
__skb_unlink(skb, skbs);
- drv_tx(local, &control, skb);
+ ieee80211_drv_tx(local, vif, sta, skb);
}
return true;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d1742a7d9ea4..79412f16b61d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -308,6 +308,11 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
for (ac = 0; ac < n_acs; ac++) {
int ac_queue = sdata->vif.hw_queue[ac];
+ if (local->ops->wake_tx_queue &&
+ (atomic_read(&sdata->txqs_len[ac]) >
+ local->hw.txq_ac_max_pending))
+ continue;
+
if (ac_queue == queue ||
(sdata->vif.cab_queue == queue &&
local->queue_stop_reasons[ac_queue] == 0 &&
@@ -2189,46 +2194,6 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->chanctx_mtx);
}
-static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
-{
- int i;
-
- for (i = 0; i < n_ids; i++)
- if (ids[i] == id)
- return true;
- return false;
-}
-
-size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
- const u8 *ids, int n_ids,
- const u8 *after_ric, int n_after_ric,
- size_t offset)
-{
- size_t pos = offset;
-
- while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
- if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
- pos += 2 + ies[pos + 1];
-
- while (pos < ielen &&
- !ieee80211_id_in_list(after_ric, n_after_ric,
- ies[pos]))
- pos += 2 + ies[pos + 1];
- } else {
- pos += 2 + ies[pos + 1];
- }
- }
-
- return pos;
-}
-
-size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
- const u8 *ids, int n_ids, size_t offset)
-{
- return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset);
-}
-EXPORT_SYMBOL(ieee80211_ie_split);
-
size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
{
size_t pos = offset;
@@ -3352,3 +3317,20 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo)
return buf;
}
+
+void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct txq_info *txqi, int tid)
+{
+ skb_queue_head_init(&txqi->queue);
+ txqi->txq.vif = &sdata->vif;
+
+ if (sta) {
+ txqi->txq.sta = &sta->sta;
+ sta->sta.txq[tid] = &txqi->txq;
+ txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
+ } else {
+ sdata->vif.txq = &txqi->txq;
+ txqi->txq.ac = IEEE80211_AC_BE;
+ }
+}
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 89f73a9e9874..a87d8b8ec730 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# nf_tables
nf_tables-objs += nf_tables_core.o nf_tables_api.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 758b002130d9..380ef5148ea1 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -19,6 +19,7 @@
#include <net/netlink.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
@@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
#include "ip_set_hash_gen.h"
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static const char *get_physindev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physindev(skb);
+
+ return dev ? dev->name : NULL;
+}
+
+static const char *get_phyoutdev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physoutdev(skb);
+
+ return dev ? dev->name : NULL;
+}
+#endif
+
static int
hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
e.ip &= ip_set_netmask(e.cidr);
#define IFACE(dir) (par->dir ? par->dir->name : NULL)
-#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL)
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
- if (!nf_bridge)
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
e.physdev = 1;
#else
e.iface = NULL;
@@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
- if (!nf_bridge)
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+
e.physdev = 1;
#else
e.iface = NULL;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index bf02932b7188..19986ec5f21a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -536,8 +536,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_update_conntrack(skb, cp, 1);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
@@ -554,8 +554,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index 2631876ac55b..a5aa5967b8e1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -17,6 +17,7 @@
#include <net/route.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
@@ -163,10 +164,10 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
const struct net_device *physindev;
const struct net_device *physoutdev;
- physindev = skb->nf_bridge->physindev;
+ physindev = nf_bridge_get_physindev(skb);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
+ physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && out != physoutdev)
nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d3cd37edca18..2e88032cd5ad 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <net/protocol.h>
@@ -54,14 +55,18 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(state->in);
if (state->out)
dev_put(state->out);
+ if (state->sk)
+ sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+ struct net_device *physdev;
- if (nf_bridge->physindev)
- dev_put(nf_bridge->physindev);
- if (nf_bridge->physoutdev)
- dev_put(nf_bridge->physoutdev);
+ physdev = nf_bridge_get_physindev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
+ physdev = nf_bridge_get_physoutdev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
}
#endif
/* Drop reference to owner of hook which queued us. */
@@ -81,15 +86,16 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(state->in);
if (state->out)
dev_hold(state->out);
+ if (state->sk)
+ sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
struct net_device *physdev;
- physdev = nf_bridge->physindev;
+ physdev = nf_bridge_get_physindev(entry->skb);
if (physdev)
dev_hold(physdev);
- physdev = nf_bridge->physoutdev;
+ physdev = nf_bridge_get_physoutdev(entry->skb);
if (physdev)
dev_hold(physdev);
}
@@ -198,7 +204,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
case NF_ACCEPT:
case NF_STOP:
local_bh_disable();
- entry->state.okfn(skb);
+ entry->state.okfn(entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5604c2df05d1..78af83bc9c8e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1545,6 +1545,23 @@ nla_put_failure:
return -1;
};
+int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
+ const struct nft_expr *expr)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, attr);
+ if (!nest)
+ goto nla_put_failure;
+ if (nf_tables_fill_expr_info(skb, expr) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
struct nft_expr_info {
const struct nft_expr_ops *ops;
struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
@@ -1622,6 +1639,39 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
module_put(expr->ops->type->owner);
}
+struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
+ const struct nlattr *nla)
+{
+ struct nft_expr_info info;
+ struct nft_expr *expr;
+ int err;
+
+ err = nf_tables_expr_parse(ctx, nla, &info);
+ if (err < 0)
+ goto err1;
+
+ err = -ENOMEM;
+ expr = kzalloc(info.ops->size, GFP_KERNEL);
+ if (expr == NULL)
+ goto err2;
+
+ err = nf_tables_newexpr(ctx, &info, expr);
+ if (err < 0)
+ goto err2;
+
+ return expr;
+err2:
+ module_put(info.ops->type->owner);
+err1:
+ return ERR_PTR(err);
+}
+
+void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
+{
+ nf_tables_expr_destroy(ctx, expr);
+ kfree(expr);
+}
+
/*
* Rules
*/
@@ -1703,12 +1753,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
if (list == NULL)
goto nla_put_failure;
nft_rule_for_each_expr(expr, next, rule) {
- struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
- if (elem == NULL)
- goto nla_put_failure;
- if (nf_tables_fill_expr_info(skb, expr) < 0)
+ if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
goto nla_put_failure;
- nla_nest_end(skb, elem);
}
nla_nest_end(skb, list);
@@ -2159,7 +2205,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
features = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
- features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
}
bops = NULL;
@@ -2216,6 +2262,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_POLICY] = { .type = NLA_U32 },
[NFTA_SET_DESC] = { .type = NLA_NESTED },
[NFTA_SET_ID] = { .type = NLA_U32 },
+ [NFTA_SET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2366,6 +2414,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (set->timeout &&
+ nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
+ goto nla_put_failure;
+ if (set->gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ goto nla_put_failure;
+
if (set->policy != NFT_SET_POL_PERFORMANCE) {
if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
goto nla_put_failure;
@@ -2578,7 +2633,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, dtype, flags, policy;
+ u64 timeout;
+ u32 ktype, dtype, flags, policy, gc_int;
struct nft_set_desc desc;
int err;
@@ -2598,15 +2654,20 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
}
desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
- if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
+ if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;
flags = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
- NFT_SET_INTERVAL | NFT_SET_MAP))
+ NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
+ NFT_SET_MAP | NFT_SET_EVAL))
return -EINVAL;
+ /* Only one of both operations is supported */
+ if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) ==
+ (NFT_SET_MAP | NFT_SET_EVAL))
+ return -EOPNOTSUPP;
}
dtype = 0;
@@ -2623,14 +2684,26 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
- if (desc.dlen == 0 ||
- desc.dlen > FIELD_SIZEOF(struct nft_data, data))
+ if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;
} else
- desc.dlen = sizeof(struct nft_data);
+ desc.dlen = sizeof(struct nft_verdict);
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ timeout = 0;
+ if (nla[NFTA_SET_TIMEOUT] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+ }
+ gc_int = 0;
+ if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ }
+
policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
@@ -2699,6 +2772,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->flags = flags;
set->size = desc.size;
set->policy = policy;
+ set->timeout = timeout;
+ set->gc_int = gc_int;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2771,9 +2846,10 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
enum nft_registers dreg;
dreg = nft_type_to_reg(set->dtype);
- return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext),
- set->dtype == NFT_DATA_VERDICT ?
- NFT_DATA_VERDICT : NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext),
+ set->dtype == NFT_DATA_VERDICT ?
+ NFT_DATA_VERDICT : NFT_DATA_VALUE,
+ set->dlen);
}
int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
@@ -2785,12 +2861,13 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
return -EBUSY;
- if (set->flags & NFT_SET_MAP) {
+ if (binding->flags & NFT_SET_MAP) {
/* If the set is already bound to the same chain all
* jumps are already validated for that chain.
*/
list_for_each_entry(i, &set->bindings, list) {
- if (i->chain == binding->chain)
+ if (binding->flags & NFT_SET_MAP &&
+ i->chain == binding->chain)
goto bind;
}
@@ -2826,17 +2903,30 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_ext_type nft_set_ext_types[] = {
[NFT_SET_EXT_KEY] = {
- .len = sizeof(struct nft_data),
- .align = __alignof__(struct nft_data),
+ .align = __alignof__(u32),
},
[NFT_SET_EXT_DATA] = {
- .len = sizeof(struct nft_data),
- .align = __alignof__(struct nft_data),
+ .align = __alignof__(u32),
+ },
+ [NFT_SET_EXT_EXPR] = {
+ .align = __alignof__(struct nft_expr),
},
[NFT_SET_EXT_FLAGS] = {
.len = sizeof(u8),
.align = __alignof__(u8),
},
+ [NFT_SET_EXT_TIMEOUT] = {
+ .len = sizeof(u64),
+ .align = __alignof__(u64),
+ },
+ [NFT_SET_EXT_EXPIRATION] = {
+ .len = sizeof(unsigned long),
+ .align = __alignof__(unsigned long),
+ },
+ [NFT_SET_EXT_USERDATA] = {
+ .len = sizeof(struct nft_userdata),
+ .align = __alignof__(struct nft_userdata),
+ },
};
EXPORT_SYMBOL_GPL(nft_set_ext_types);
@@ -2848,6 +2938,9 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
+ [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2904,11 +2997,43 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
set->dlen) < 0)
goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) &&
+ nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
+ goto nla_put_failure;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+ cpu_to_be64(*nft_set_ext_timeout(ext))))
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ unsigned long expires, now = jiffies;
+
+ expires = *nft_set_ext_expiration(ext);
+ if (time_before(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ cpu_to_be64(jiffies_to_msecs(expires))))
+ goto nla_put_failure;
+ }
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+ struct nft_userdata *udata;
+
+ udata = nft_set_ext_userdata(ext);
+ if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+ udata->len + 1, udata->data))
+ goto nla_put_failure;
+ }
+
nla_nest_end(skb, nest);
return 0;
@@ -3128,11 +3253,10 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
return trans;
}
-static void *nft_set_elem_init(const struct nft_set *set,
- const struct nft_set_ext_tmpl *tmpl,
- const struct nft_data *key,
- const struct nft_data *data,
- gfp_t gfp)
+void *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const u32 *key, const u32 *data,
+ u64 timeout, gfp_t gfp)
{
struct nft_set_ext *ext;
void *elem;
@@ -3147,6 +3271,11 @@ static void *nft_set_elem_init(const struct nft_set *set,
memcpy(nft_set_ext_key(ext), key, set->klen);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
memcpy(nft_set_ext_data(ext), data, set->dlen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
+ *nft_set_ext_expiration(ext) =
+ jiffies + msecs_to_jiffies(timeout);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+ *nft_set_ext_timeout(ext) = timeout;
return elem;
}
@@ -3158,6 +3287,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem)
nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
kfree(elem);
}
@@ -3172,15 +3303,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_ext *ext;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_userdata *udata;
struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
+ u64 timeout;
u32 flags;
+ u8 ulen;
int err;
- if (set->size && set->nelems == set->size)
- return -ENFILE;
-
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -3215,17 +3346,33 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return -EINVAL;
}
- err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
+ timeout = 0;
+ if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+ } else if (set->flags & NFT_SET_TIMEOUT) {
+ timeout = set->timeout;
+ }
+
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
err = -EINVAL;
if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
goto err2;
- nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len);
+ if (timeout > 0) {
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (timeout != set->timeout)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ }
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
- err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
+ err = nft_data_init(ctx, &data, sizeof(data), &d2,
+ nla[NFTA_SET_ELEM_DATA]);
if (err < 0)
goto err2;
@@ -3241,29 +3388,51 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
.chain = (struct nft_chain *)binding->chain,
};
- err = nft_validate_data_load(&bind_ctx, dreg,
- &data, d2.type);
+ if (!(binding->flags & NFT_SET_MAP))
+ continue;
+
+ err = nft_validate_register_store(&bind_ctx, dreg,
+ &data,
+ d2.type, d2.len);
if (err < 0)
goto err3;
}
- nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
+ }
+
+ /* The full maximum length of userdata can exceed the maximum
+ * offset value (U8_MAX) for following extensions, therefor it
+ * must be the last extension added.
+ */
+ ulen = 0;
+ if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+ ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+ if (ulen > 0)
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
}
err = -ENOMEM;
- elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+ elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
+ timeout, GFP_KERNEL);
if (elem.priv == NULL)
goto err3;
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
*nft_set_ext_flags(ext) = flags;
+ if (ulen > 0) {
+ udata = nft_set_ext_userdata(ext);
+ udata->len = ulen - 1;
+ nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
+ }
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
goto err4;
- ext->genmask = nft_genmask_cur(ctx->net);
+ ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
err = set->ops->insert(set, &elem);
if (err < 0)
goto err5;
@@ -3280,7 +3449,7 @@ err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_uninit(&data, d2.type);
err2:
- nft_data_uninit(&elem.key, d1.type);
+ nft_data_uninit(&elem.key.val, d1.type);
err1:
return err;
}
@@ -3316,11 +3485,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
+ return -ENFILE;
+
err = nft_add_set_elem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ atomic_dec(&set->nelems);
break;
-
- set->nelems++;
+ }
}
return err;
}
@@ -3343,7 +3516,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_KEY] == NULL)
goto err1;
- err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]);
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
@@ -3370,7 +3544,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
err3:
kfree(trans);
err2:
- nft_data_uninit(&elem.key, desc.type);
+ nft_data_uninit(&elem.key.val, desc.type);
err1:
return err;
}
@@ -3402,11 +3576,36 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
break;
- set->nelems--;
+ set->ndeact++;
}
return err;
}
+void nft_set_gc_batch_release(struct rcu_head *rcu)
+{
+ struct nft_set_gc_batch *gcb;
+ unsigned int i;
+
+ gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+ for (i = 0; i < gcb->head.cnt; i++)
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ kfree(gcb);
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+ gfp_t gfp)
+{
+ struct nft_set_gc_batch *gcb;
+
+ gcb = kzalloc(sizeof(*gcb), gfp);
+ if (gcb == NULL)
+ return gcb;
+ gcb->head.set = set;
+ return gcb;
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
+
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
@@ -3710,6 +3909,8 @@ static int nf_tables_commit(struct sk_buff *skb)
&te->elem,
NFT_MSG_DELSETELEM, 0);
te->set->ops->remove(te->set, &te->elem);
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
break;
}
}
@@ -3813,16 +4014,16 @@ static int nf_tables_abort(struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- nft_trans_elem_set(trans)->nelems--;
te = (struct nft_trans_elem *)trans->data;
te->set->ops->remove(te->set, &te->elem);
+ atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_trans_elem_set(trans)->nelems++;
te->set->ops->activate(te->set, &te->elem);
+ te->set->ndeact--;
nft_trans_destroy(trans);
break;
@@ -3906,10 +4107,10 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
return 0;
data = nft_set_ext_data(ext);
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- return nf_tables_check_loops(ctx, data->chain);
+ return nf_tables_check_loops(ctx, data->verdict.chain);
default:
return 0;
}
@@ -3942,10 +4143,11 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
if (data == NULL)
continue;
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- err = nf_tables_check_loops(ctx, data->chain);
+ err = nf_tables_check_loops(ctx,
+ data->verdict.chain);
if (err < 0)
return err;
default:
@@ -3960,7 +4162,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
continue;
list_for_each_entry(binding, &set->bindings, list) {
- if (binding->chain != chain)
+ if (!(binding->flags & NFT_SET_MAP) ||
+ binding->chain != chain)
continue;
iter.skip = 0;
@@ -3978,85 +4181,129 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
}
/**
- * nft_validate_input_register - validate an expressions' input register
+ * nft_parse_register - parse a register value from a netlink attribute
*
- * @reg: the register number
+ * @attr: netlink attribute
*
- * Validate that the input register is one of the general purpose
- * registers.
+ * Parse and translate a register value from a netlink attribute.
+ * Registers used to be 128 bit wide, these register numbers will be
+ * mapped to the corresponding 32 bit register numbers.
*/
-int nft_validate_input_register(enum nft_registers reg)
+unsigned int nft_parse_register(const struct nlattr *attr)
{
- if (reg <= NFT_REG_VERDICT)
- return -EINVAL;
- if (reg > NFT_REG_MAX)
- return -ERANGE;
- return 0;
+ unsigned int reg;
+
+ reg = ntohl(nla_get_be32(attr));
+ switch (reg) {
+ case NFT_REG_VERDICT...NFT_REG_4:
+ return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ default:
+ return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ }
}
-EXPORT_SYMBOL_GPL(nft_validate_input_register);
+EXPORT_SYMBOL_GPL(nft_parse_register);
/**
- * nft_validate_output_register - validate an expressions' output register
+ * nft_dump_register - dump a register value to a netlink attribute
+ *
+ * @skb: socket buffer
+ * @attr: attribute number
+ * @reg: register number
+ *
+ * Construct a netlink attribute containing the register number. For
+ * compatibility reasons, register numbers being a multiple of 4 are
+ * translated to the corresponding 128 bit register numbers.
+ */
+int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
+{
+ if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
+ reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
+ else
+ reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;
+
+ return nla_put_be32(skb, attr, htonl(reg));
+}
+EXPORT_SYMBOL_GPL(nft_dump_register);
+
+/**
+ * nft_validate_register_load - validate a load from a register
*
* @reg: the register number
+ * @len: the length of the data
*
- * Validate that the output register is one of the general purpose
- * registers or the verdict register.
+ * Validate that the input register is one of the general purpose
+ * registers and that the length of the load is within the bounds.
*/
-int nft_validate_output_register(enum nft_registers reg)
+int nft_validate_register_load(enum nft_registers reg, unsigned int len)
{
- if (reg < NFT_REG_VERDICT)
+ if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
+ return -EINVAL;
+ if (len == 0)
return -EINVAL;
- if (reg > NFT_REG_MAX)
+ if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
return -ERANGE;
+
return 0;
}
-EXPORT_SYMBOL_GPL(nft_validate_output_register);
+EXPORT_SYMBOL_GPL(nft_validate_register_load);
/**
- * nft_validate_data_load - validate an expressions' data load
+ * nft_validate_register_store - validate an expressions' register store
*
* @ctx: context of the expression performing the load
* @reg: the destination register number
* @data: the data to load
* @type: the data type
+ * @len: the length of the data
*
* Validate that a data load uses the appropriate data type for
- * the destination register. A value of NULL for the data means
- * that its runtime gathered data, which is always of type
- * NFT_DATA_VALUE.
+ * the destination register and the length is within the bounds.
+ * A value of NULL for the data means that its runtime gathered
+ * data.
*/
-int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type)
+int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
{
int err;
switch (reg) {
case NFT_REG_VERDICT:
- if (data == NULL || type != NFT_DATA_VERDICT)
+ if (type != NFT_DATA_VERDICT)
return -EINVAL;
- if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) {
- err = nf_tables_check_loops(ctx, data->chain);
+ if (data != NULL &&
+ (data->verdict.code == NFT_GOTO ||
+ data->verdict.code == NFT_JUMP)) {
+ err = nf_tables_check_loops(ctx, data->verdict.chain);
if (err < 0)
return err;
- if (ctx->chain->level + 1 > data->chain->level) {
+ if (ctx->chain->level + 1 >
+ data->verdict.chain->level) {
if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
return -EMLINK;
- data->chain->level = ctx->chain->level + 1;
+ data->verdict.chain->level = ctx->chain->level + 1;
}
}
return 0;
default:
+ if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
+ return -EINVAL;
+ if (len == 0)
+ return -EINVAL;
+ if (reg * NFT_REG32_SIZE + len >
+ FIELD_SIZEOF(struct nft_regs, data))
+ return -ERANGE;
+
if (data != NULL && type != NFT_DATA_VALUE)
return -EINVAL;
return 0;
}
}
-EXPORT_SYMBOL_GPL(nft_validate_data_load);
+EXPORT_SYMBOL_GPL(nft_validate_register_store);
static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
@@ -4077,11 +4324,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
- data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+ data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
- switch (data->verdict) {
+ switch (data->verdict.code) {
default:
- switch (data->verdict & NF_VERDICT_MASK) {
+ switch (data->verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
@@ -4107,7 +4354,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return -EOPNOTSUPP;
chain->use++;
- data->chain = chain;
+ data->verdict.chain = chain;
desc->len = sizeof(data);
break;
}
@@ -4118,10 +4365,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- data->chain->use--;
+ data->verdict.chain->use--;
break;
}
}
@@ -4134,13 +4381,14 @@ static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
if (!nest)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
goto nla_put_failure;
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+ if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
+ data->verdict.chain->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
@@ -4150,7 +4398,8 @@ nla_put_failure:
return -1;
}
-static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+static int nft_value_init(const struct nft_ctx *ctx,
+ struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla)
{
unsigned int len;
@@ -4158,10 +4407,10 @@ static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
len = nla_len(nla);
if (len == 0)
return -EINVAL;
- if (len > sizeof(data->data))
+ if (len > size)
return -EOVERFLOW;
- nla_memcpy(data->data, nla, sizeof(data->data));
+ nla_memcpy(data->data, nla, len);
desc->type = NFT_DATA_VALUE;
desc->len = len;
return 0;
@@ -4174,8 +4423,7 @@ static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
}
static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
- [NFTA_DATA_VALUE] = { .type = NLA_BINARY,
- .len = FIELD_SIZEOF(struct nft_data, data) },
+ [NFTA_DATA_VALUE] = { .type = NLA_BINARY },
[NFTA_DATA_VERDICT] = { .type = NLA_NESTED },
};
@@ -4184,6 +4432,7 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
*
* @ctx: context of the expression using the data
* @data: destination struct nft_data
+ * @size: maximum data length
* @desc: data description
* @nla: netlink attribute containing data
*
@@ -4193,7 +4442,8 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
* The caller can indicate that it only wants to accept data of type
* NFT_DATA_VALUE by passing NULL for the ctx argument.
*/
-int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+int nft_data_init(const struct nft_ctx *ctx,
+ struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla)
{
struct nlattr *tb[NFTA_DATA_MAX + 1];
@@ -4204,7 +4454,8 @@ int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
return err;
if (tb[NFTA_DATA_VALUE])
- return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+ return nft_value_init(ctx, data, size, desc,
+ tb[NFTA_DATA_VALUE]);
if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
return -EINVAL;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index ef4dfcbaf149..f153b07073af 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -65,23 +65,23 @@ static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
}
static void nft_cmp_fast_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1])
+ struct nft_regs *regs)
{
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
u32 mask = nft_cmp_fast_mask(priv->len);
- if ((data[priv->sreg].data[0] & mask) == priv->data)
+ if ((regs->data[priv->sreg] & mask) == priv->data)
return;
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static bool nft_payload_fast_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
unsigned char *ptr;
if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
@@ -94,12 +94,13 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
return false;
+ *dest = 0;
if (priv->len == 2)
- *(u16 *)dest->data = *(u16 *)ptr;
+ *(u16 *)dest = *(u16 *)ptr;
else if (priv->len == 4)
- *(u32 *)dest->data = *(u32 *)ptr;
+ *(u32 *)dest = *(u32 *)ptr;
else
- *(u8 *)dest->data = *(u8 *)ptr;
+ *(u8 *)dest = *(u8 *)ptr;
return true;
}
@@ -116,7 +117,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
- struct nft_data data[NFT_REG_MAX + 1];
+ struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
struct nft_stats *stats;
@@ -127,7 +128,7 @@ do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
next_rule:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs.verdict.code = NFT_CONTINUE;
list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
/* This rule is not active, skip. */
@@ -138,18 +139,18 @@ next_rule:
nft_rule_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
- nft_cmp_fast_eval(expr, data);
+ nft_cmp_fast_eval(expr, &regs);
else if (expr->ops != &nft_payload_fast_ops ||
- !nft_payload_fast_eval(expr, data, pkt))
- expr->ops->eval(expr, data, pkt);
+ !nft_payload_fast_eval(expr, &regs, pkt))
+ expr->ops->eval(expr, &regs, pkt);
- if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+ if (regs.verdict.code != NFT_CONTINUE)
break;
}
- switch (data[NFT_REG_VERDICT].verdict) {
+ switch (regs.verdict.code) {
case NFT_BREAK:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs.verdict.code = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
@@ -158,15 +159,15 @@ next_rule:
break;
}
- switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) {
+ switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
- return data[NFT_REG_VERDICT].verdict;
+ return regs.verdict.code;
}
- switch (data[NFT_REG_VERDICT].verdict) {
+ switch (regs.verdict.code) {
case NFT_JUMP:
BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
jumpstack[stackptr].chain = chain;
@@ -177,7 +178,7 @@ next_rule:
case NFT_GOTO:
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
- chain = data[NFT_REG_VERDICT].chain;
+ chain = regs.verdict.chain;
goto do_chain;
case NFT_CONTINUE:
rulenum++;
@@ -239,8 +240,14 @@ int __init nf_tables_core_module_init(void)
if (err < 0)
goto err6;
+ err = nft_dynset_module_init();
+ if (err < 0)
+ goto err7;
+
return 0;
+err7:
+ nft_payload_module_exit();
err6:
nft_byteorder_module_exit();
err5:
@@ -257,6 +264,7 @@ err1:
void nf_tables_core_module_exit(void)
{
+ nft_dynset_module_exit();
nft_payload_module_exit();
nft_byteorder_module_exit();
nft_bitwise_module_exit();
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 957b83a0223b..3ad91266c821 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -23,6 +23,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
@@ -62,7 +63,7 @@ struct nfulnl_instance {
struct timer_list timer;
struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
- int peer_portid; /* PORTID of the peer process */
+ u32 peer_portid; /* PORTID of the peer process */
/* configurable parameters */
unsigned int flushtimeout; /* timeout until queue flush */
@@ -151,7 +152,7 @@ static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
instance_create(struct net *net, u_int16_t group_num,
- int portid, struct user_namespace *user_ns)
+ u32 portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
struct nfnl_log_net *log = nfnl_log_pernet(net);
@@ -448,14 +449,18 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physindev;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physindev &&
+
+ physindev = nf_bridge_get_physindev(skb);
+ if (physindev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
- htonl(skb->nf_bridge->physindev->ifindex)))
+ htonl(physindev->ifindex)))
goto nla_put_failure;
}
#endif
@@ -479,14 +484,18 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physoutdev;
+
/* Case 2: indev is a bridge group, we need to look
* for physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physoutdev &&
+
+ physoutdev = nf_bridge_get_physoutdev(skb);
+ if (physoutdev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
- htonl(skb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutdev->ifindex)))
goto nla_put_failure;
}
#endif
@@ -998,7 +1007,7 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfulnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 6e74655a8d4f..0b98c7420239 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -25,6 +25,7 @@
#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
#include <linux/list.h>
@@ -54,7 +55,7 @@ struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
- int peer_portid;
+ u32 peer_portid;
unsigned int queue_maxlen;
unsigned int copy_range;
unsigned int queue_dropped;
@@ -109,8 +110,7 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
- int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
{
struct nfqnl_instance *inst;
unsigned int h;
@@ -396,14 +396,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physinif;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physindev &&
+
+ physinif = nf_bridge_get_physinif(entskb);
+ if (physinif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
- htonl(entskb->nf_bridge->physindev->ifindex)))
+ htonl(physinif)))
goto nla_put_failure;
}
#endif
@@ -426,14 +430,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physoutif;
+
/* Case 2: outdev is bridge group, we need to look for
* physical output device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physoutdev &&
+
+ physoutif = nf_bridge_get_physoutif(entskb);
+ if (physoutif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
- htonl(entskb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutif)))
goto nla_put_failure;
}
#endif
@@ -765,11 +773,12 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
return 1;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- if (entry->skb->nf_bridge->physindev &&
- entry->skb->nf_bridge->physindev->ifindex == ifindex)
- return 1;
- if (entry->skb->nf_bridge->physoutdev &&
- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ int physinif, physoutif;
+
+ physinif = nf_bridge_get_physinif(entry->skb);
+ physoutif = nf_bridge_get_physoutif(entry->skb);
+
+ if (physinif == ifindex || physoutif == ifindex)
return 1;
}
#endif
@@ -860,7 +869,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
};
static struct nfqnl_instance *
-verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
{
struct nfqnl_instance *queue;
@@ -1242,7 +1251,7 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfqnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
inst->queue_num,
inst->peer_portid, inst->queue_total,
inst->copy_mode, inst->copy_range,
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 4fb6ee2c1106..d71cc18fa35d 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -26,18 +26,16 @@ struct nft_bitwise {
};
static void nft_bitwise_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- const struct nft_data *src = &data[priv->sreg];
- struct nft_data *dst = &data[priv->dreg];
+ const u32 *src = &regs->data[priv->sreg];
+ u32 *dst = &regs->data[priv->dreg];
unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
- dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
- priv->xor.data[i];
- }
+ for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+ dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
@@ -63,28 +61,27 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_XOR] == NULL)
return -EINVAL;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+ priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
+ err = nft_validate_register_load(priv->sreg, priv->len);
if (err < 0)
return err;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
- err = nft_validate_output_register(priv->dreg);
+ priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
if (err < 0)
return err;
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
- priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
- err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+ err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1,
+ tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
if (d1.len != priv->len)
return -EINVAL;
- err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+ err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
+ tb[NFTA_BITWISE_XOR]);
if (err < 0)
return err;
if (d2.len != priv->len)
@@ -97,9 +94,9 @@ static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index c39ed8d29df1..fde5145f2e36 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -26,16 +26,17 @@ struct nft_byteorder {
};
static void nft_byteorder_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+ u32 *src = &regs->data[priv->sreg];
+ u32 *dst = &regs->data[priv->dreg];
union { u32 u32; u16 u16; } *s, *d;
unsigned int i;
- s = (void *)src->data;
- d = (void *)dst->data;
+ s = (void *)src;
+ d = (void *)dst;
switch (priv->size) {
case 4:
@@ -87,19 +88,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
tb[NFTA_BYTEORDER_OP] == NULL)
return -EINVAL;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
- err = nft_validate_input_register(priv->sreg);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
@@ -109,10 +97,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
- if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
- return -EINVAL;
-
priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
switch (priv->size) {
case 2:
@@ -122,16 +106,24 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- return 0;
+ priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+ err = nft_validate_register_load(priv->sreg, priv->len);
+ if (err < 0)
+ return err;
+
+ priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index e2b3f51c81f1..e25b35d70e4d 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -25,13 +25,13 @@ struct nft_cmp_expr {
};
static void nft_cmp_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_cmp_expr *priv = nft_expr_priv(expr);
int d;
- d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+ d = memcmp(&regs->data[priv->sreg], &priv->data, priv->len);
switch (priv->op) {
case NFT_CMP_EQ:
if (d != 0)
@@ -59,7 +59,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
return;
mismatch:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
@@ -75,12 +75,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct nft_data_desc desc;
int err;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
- priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
-
- err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
+ tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
+ priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
+ err = nft_validate_register_load(priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
priv->len = desc.len;
return 0;
}
@@ -89,7 +93,7 @@ static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_cmp_expr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
goto nla_put_failure;
@@ -122,13 +126,18 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
u32 mask;
int err;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
-
- err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, sizeof(data), &desc,
+ tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
- desc.len *= BITS_PER_BYTE;
+ priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
+ err = nft_validate_register_load(priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ desc.len *= BITS_PER_BYTE;
mask = nft_cmp_fast_mask(desc.len);
+
priv->data = data.data[0] & mask;
priv->len = desc.len;
return 0;
@@ -139,7 +148,7 @@ static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
struct nft_data data;
- if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ)))
goto nla_put_failure;
@@ -167,7 +176,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
struct nft_data_desc desc;
struct nft_data data;
- enum nft_registers sreg;
enum nft_cmp_ops op;
int err;
@@ -176,11 +184,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
tb[NFTA_CMP_DATA] == NULL)
return ERR_PTR(-EINVAL);
- sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
- err = nft_validate_input_register(sreg);
- if (err < 0)
- return ERR_PTR(err);
-
op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
switch (op) {
case NFT_CMP_EQ:
@@ -194,7 +197,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
return ERR_PTR(-EINVAL);
}
- err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, sizeof(data), &desc,
+ tb[NFTA_CMP_DATA]);
if (err < 0)
return ERR_PTR(err);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 589b8487cd08..7f29cfc76349 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -55,7 +55,7 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
}
static void nft_target_eval_xt(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -72,16 +72,16 @@ static void nft_target_eval_xt(const struct nft_expr *expr,
switch (ret) {
case XT_CONTINUE:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs->verdict.code = NFT_CONTINUE;
break;
default:
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
break;
}
}
static void nft_target_eval_bridge(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -98,19 +98,19 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
switch (ret) {
case EBT_ACCEPT:
- data[NFT_REG_VERDICT].verdict = NF_ACCEPT;
+ regs->verdict.code = NF_ACCEPT;
break;
case EBT_DROP:
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
break;
case EBT_CONTINUE:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs->verdict.code = NFT_CONTINUE;
break;
case EBT_RETURN:
- data[NFT_REG_VERDICT].verdict = NFT_RETURN;
+ regs->verdict.code = NFT_RETURN;
break;
default:
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
break;
}
}
@@ -304,7 +304,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
}
static void nft_match_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -317,16 +317,16 @@ static void nft_match_eval(const struct nft_expr *expr,
ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
if (pkt->xt.hotdrop) {
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
return;
}
- switch(ret) {
- case true:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ switch (ret ? 1 : 0) {
+ case 1:
+ regs->verdict.code = NFT_CONTINUE;
break;
- case false:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ case 0:
+ regs->verdict.code = NFT_BREAK;
break;
}
}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index c89ee486ce54..17591239229f 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -24,7 +24,7 @@ struct nft_counter {
};
static void nft_counter_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_counter *priv = nft_expr_priv(expr);
@@ -92,6 +92,7 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
.ops = &nft_counter_ops,
.policy = nft_counter_policy,
.maxattr = NFTA_COUNTER_MAX,
+ .flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index cc5603016242..8cbca3432f90 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -31,11 +31,11 @@ struct nft_ct {
};
static void nft_ct_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
const struct nf_conn_help *help;
@@ -54,8 +54,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
state = NF_CT_STATE_UNTRACKED_BIT;
else
state = NF_CT_STATE_BIT(ctinfo);
- dest->data[0] = state;
+ *dest = state;
return;
+ default:
+ break;
}
if (ct == NULL)
@@ -63,26 +65,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_CT_DIRECTION:
- dest->data[0] = CTINFO2DIR(ctinfo);
+ *dest = CTINFO2DIR(ctinfo);
return;
case NFT_CT_STATUS:
- dest->data[0] = ct->status;
+ *dest = ct->status;
return;
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
- dest->data[0] = ct->mark;
+ *dest = ct->mark;
return;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
- dest->data[0] = ct->secmark;
+ *dest = ct->secmark;
return;
#endif
case NFT_CT_EXPIRATION:
diff = (long)jiffies - (long)ct->timeout.expires;
if (diff < 0)
diff = 0;
- dest->data[0] = jiffies_to_msecs(diff);
+ *dest = jiffies_to_msecs(diff);
return;
case NFT_CT_HELPER:
if (ct->master == NULL)
@@ -93,9 +95,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
helper = rcu_dereference(help->helper);
if (helper == NULL)
goto err;
- if (strlen(helper->name) >= sizeof(dest->data))
- goto err;
- strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+ strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
@@ -103,58 +103,60 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
unsigned int size;
if (!labels) {
- memset(dest->data, 0, sizeof(dest->data));
+ memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
return;
}
- BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
size = labels->words * sizeof(long);
-
- memcpy(dest->data, labels->bits, size);
- if (size < sizeof(dest->data))
- memset(((char *) dest->data) + size, 0,
- sizeof(dest->data) - size);
+ memcpy(dest, labels->bits, size);
+ if (size < NF_CT_LABELS_MAX_SIZE)
+ memset(((char *) dest) + size, 0,
+ NF_CT_LABELS_MAX_SIZE - size);
return;
}
#endif
+ default:
+ break;
}
tuple = &ct->tuplehash[priv->dir].tuple;
switch (priv->key) {
case NFT_CT_L3PROTOCOL:
- dest->data[0] = nf_ct_l3num(ct);
+ *dest = nf_ct_l3num(ct);
return;
case NFT_CT_SRC:
- memcpy(dest->data, tuple->src.u3.all,
+ memcpy(dest, tuple->src.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
case NFT_CT_DST:
- memcpy(dest->data, tuple->dst.u3.all,
+ memcpy(dest, tuple->dst.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
case NFT_CT_PROTOCOL:
- dest->data[0] = nf_ct_protonum(ct);
+ *dest = nf_ct_protonum(ct);
return;
case NFT_CT_PROTO_SRC:
- dest->data[0] = (__force __u16)tuple->src.u.all;
+ *dest = (__force __u16)tuple->src.u.all;
return;
case NFT_CT_PROTO_DST:
- dest->data[0] = (__force __u16)tuple->dst.u.all;
+ *dest = (__force __u16)tuple->dst.u.all;
return;
+ default:
+ break;
}
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static void nft_ct_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_ct *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
#ifdef CONFIG_NF_CONNTRACK_MARK
- u32 value = data[priv->sreg].data[0];
+ u32 value = regs->data[priv->sreg];
#endif
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
@@ -172,6 +174,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
}
break;
#endif
+ default:
+ break;
}
}
@@ -220,12 +224,17 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
- case NFT_CT_STATE:
case NFT_CT_DIRECTION:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = sizeof(u8);
+ break;
+ case NFT_CT_STATE:
case NFT_CT_STATUS:
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
@@ -233,22 +242,54 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
#endif
+ case NFT_CT_EXPIRATION:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = sizeof(u32);
+ break;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = NF_CT_LABELS_MAX_SIZE;
+ break;
#endif
- case NFT_CT_EXPIRATION:
case NFT_CT_HELPER:
if (tb[NFTA_CT_DIRECTION] != NULL)
return -EINVAL;
+ len = NF_CT_HELPER_NAME_LEN;
break;
+
case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+ len = sizeof(u8);
+ break;
case NFT_CT_SRC:
case NFT_CT_DST:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_IPV4:
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ src.u3.ip);
+ break;
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ src.u3.ip6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ break;
case NFT_CT_PROTO_SRC:
case NFT_CT_PROTO_DST:
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
break;
default:
return -EOPNOTSUPP;
@@ -265,12 +306,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
}
}
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
if (err < 0)
return err;
@@ -286,20 +324,22 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
+ len = FIELD_SIZEOF(struct nf_conn, mark);
break;
#endif
default:
return -EOPNOTSUPP;
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
+ err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
return err;
@@ -320,7 +360,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
@@ -347,7 +387,7 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CT_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
new file mode 100644
index 000000000000..513a8ef60a59
--- /dev/null
+++ b/net/netfilter/nft_dynset.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_dynset {
+ struct nft_set *set;
+ struct nft_set_ext_tmpl tmpl;
+ enum nft_dynset_ops op:8;
+ enum nft_registers sreg_key:8;
+ enum nft_registers sreg_data:8;
+ u64 timeout;
+ struct nft_expr *expr;
+ struct nft_set_binding binding;
+};
+
+static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
+ struct nft_regs *regs)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set_ext *ext;
+ u64 timeout;
+ void *elem;
+
+ if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
+ return NULL;
+
+ timeout = priv->timeout ? : set->timeout;
+ elem = nft_set_elem_init(set, &priv->tmpl,
+ &regs->data[priv->sreg_key],
+ &regs->data[priv->sreg_data],
+ timeout, GFP_ATOMIC);
+ if (elem == NULL) {
+ if (set->size)
+ atomic_dec(&set->nelems);
+ return NULL;
+ }
+
+ ext = nft_set_elem_ext(set, elem);
+ if (priv->expr != NULL)
+ nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+
+ return elem;
+}
+
+static void nft_dynset_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
+ const struct nft_expr *sexpr;
+ u64 timeout;
+
+ if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
+ expr, regs, &ext)) {
+ sexpr = NULL;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ sexpr = nft_set_ext_expr(ext);
+
+ if (priv->op == NFT_DYNSET_OP_UPDATE &&
+ nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ timeout = priv->timeout ? : set->timeout;
+ *nft_set_ext_expiration(ext) = jiffies + timeout;
+ } else if (sexpr == NULL)
+ goto out;
+
+ if (sexpr != NULL)
+ sexpr->ops->eval(sexpr, regs, pkt);
+ return;
+ }
+out:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
+ [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 },
+ [NFTA_DYNSET_OP] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
+ [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED },
+};
+
+static int nft_dynset_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set;
+ u64 timeout;
+ int err;
+
+ if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
+ tb[NFTA_DYNSET_OP] == NULL ||
+ tb[NFTA_DYNSET_SREG_KEY] == NULL)
+ return -EINVAL;
+
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_DYNSET_SET_ID])
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_DYNSET_SET_ID]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
+ if (set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+ switch (priv->op) {
+ case NFT_DYNSET_OP_ADD:
+ break;
+ case NFT_DYNSET_OP_UPDATE:
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ timeout = 0;
+ if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+ }
+
+ priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
+ err = nft_validate_register_load(priv->sreg_key, set->klen);;
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+ if (!(set->flags & NFT_SET_MAP))
+ return -EINVAL;
+ if (set->dtype == NFT_DATA_VERDICT)
+ return -EOPNOTSUPP;
+
+ priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]);
+ err = nft_validate_register_load(priv->sreg_data, set->dlen);
+ if (err < 0)
+ return err;
+ } else if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ if (tb[NFTA_DYNSET_EXPR] != NULL) {
+ if (!(set->flags & NFT_SET_EVAL))
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_ANONYMOUS))
+ return -EOPNOTSUPP;
+
+ priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
+ if (IS_ERR(priv->expr))
+ return PTR_ERR(priv->expr);
+
+ err = -EOPNOTSUPP;
+ if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
+ goto err1;
+ } else if (set->flags & NFT_SET_EVAL)
+ return -EINVAL;
+
+ nft_set_ext_prepare(&priv->tmpl);
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (set->flags & NFT_SET_MAP)
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+ if (priv->expr != NULL)
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR,
+ priv->expr->ops->size);
+ if (set->flags & NFT_SET_TIMEOUT) {
+ if (timeout || set->timeout)
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
+
+ priv->timeout = timeout;
+
+ err = nf_tables_bind_set(ctx, set, &priv->binding);
+ if (err < 0)
+ goto err1;
+
+ priv->set = set;
+ return 0;
+
+err1:
+ if (priv->expr != NULL)
+ nft_expr_destroy(ctx, priv->expr);
+ return err;
+}
+
+static void nft_dynset_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ if (priv->expr != NULL)
+ nft_expr_destroy(ctx, priv->expr);
+}
+
+static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
+ goto nla_put_failure;
+ if (priv->set->flags & NFT_SET_MAP &&
+ nft_dump_register(skb, NFTA_DYNSET_SREG_DATA, priv->sreg_data))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op)))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
+ goto nla_put_failure;
+ if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dynset_type;
+static const struct nft_expr_ops nft_dynset_ops = {
+ .type = &nft_dynset_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
+ .eval = nft_dynset_eval,
+ .init = nft_dynset_init,
+ .destroy = nft_dynset_destroy,
+ .dump = nft_dynset_dump,
+};
+
+static struct nft_expr_type nft_dynset_type __read_mostly = {
+ .name = "dynset",
+ .ops = &nft_dynset_ops,
+ .policy = nft_dynset_policy,
+ .maxattr = NFTA_DYNSET_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_dynset_module_init(void)
+{
+ return nft_register_expr(&nft_dynset_type);
+}
+
+void nft_dynset_module_exit(void)
+{
+ nft_unregister_expr(&nft_dynset_type);
+}
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
deleted file mode 100644
index b6eed4d5a096..000000000000
--- a/net/netfilter/nft_expr_template.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-
-struct nft_template {
-
-};
-
-static void nft_template_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
-}
-
-static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
- [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 },
-};
-
-static int nft_template_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
- return 0;
-}
-
-static void nft_template_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
-}
-
-static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
-{
- const struct nft_template *priv = nft_expr_priv(expr);
-
- NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
-static struct nft_expr_type nft_template_type;
-static const struct nft_expr_ops nft_template_ops = {
- .type = &nft_template_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_template)),
- .eval = nft_template_eval,
- .init = nft_template_init,
- .destroy = nft_template_destroy,
- .dump = nft_template_dump,
-};
-
-static struct nft_expr_type nft_template_type __read_mostly = {
- .name = "template",
- .ops = &nft_template_ops,
- .policy = nft_template_policy,
- .maxattr = NFTA_TEMPLATE_MAX,
- .owner = THIS_MODULE,
-};
-
-static int __init nft_template_module_init(void)
-{
- return nft_register_expr(&nft_template_type);
-}
-
-static void __exit nft_template_module_exit(void)
-{
- nft_unregister_expr(&nft_template_type);
-}
-
-module_init(nft_template_module_init);
-module_exit(nft_template_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 55c939f5371f..ba7aed13e174 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -26,11 +26,11 @@ struct nft_exthdr {
};
static void nft_exthdr_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
unsigned int offset = 0;
int err;
@@ -39,11 +39,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
goto err;
offset += priv->offset;
- if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
@@ -58,7 +59,6 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- int err;
if (tb[NFTA_EXTHDR_DREG] == NULL ||
tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -69,22 +69,17 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
- if (priv->len == 0 ||
- priv->len > FIELD_SIZEOF(struct nft_data, data))
- return -EINVAL;
+ priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_exthdr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c7e1a9d7d46f..3f9d45d3d9b7 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,6 +15,7 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
+#include <linux/workqueue.h>
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -25,6 +26,7 @@
struct nft_hash {
struct rhashtable ht;
+ struct delayed_work gc_work;
};
struct nft_hash_elem {
@@ -34,7 +36,7 @@ struct nft_hash_elem {
struct nft_hash_cmp_arg {
const struct nft_set *set;
- const struct nft_data *key;
+ const u32 *key;
u8 genmask;
};
@@ -60,15 +62,16 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
const struct nft_hash_cmp_arg *x = arg->key;
const struct nft_hash_elem *he = ptr;
- if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
+ if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
return 0;
}
-static bool nft_hash_lookup(const struct nft_set *set,
- const struct nft_data *key,
+static bool nft_hash_lookup(const struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
@@ -86,6 +89,42 @@ static bool nft_hash_lookup(const struct nft_set *set,
return !!he;
}
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_regs *regs),
+ const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ goto out;
+
+ he = new(set, expr, regs);
+ if (he == NULL)
+ goto err1;
+ if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params))
+ goto err2;
+out:
+ *ext = &he->ext;
+ return true;
+
+err2:
+ nft_set_elem_destroy(set, he);
+err1:
+ return false;
+}
+
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
@@ -94,7 +133,7 @@ static int nft_hash_insert(const struct nft_set *set,
struct nft_hash_cmp_arg arg = {
.genmask = nft_genmask_next(read_pnet(&set->pnet)),
.set = set,
- .key = &elem->key,
+ .key = elem->key.val.data,
};
return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
@@ -107,6 +146,7 @@ static void nft_hash_activate(const struct nft_set *set,
struct nft_hash_elem *he = elem->priv;
nft_set_elem_change_active(set, &he->ext);
+ nft_set_elem_clear_busy(&he->ext);
}
static void *nft_hash_deactivate(const struct nft_set *set,
@@ -117,12 +157,18 @@ static void *nft_hash_deactivate(const struct nft_set *set,
struct nft_hash_cmp_arg arg = {
.genmask = nft_genmask_next(read_pnet(&set->pnet)),
.set = set,
- .key = &elem->key,
+ .key = elem->key.val.data,
};
+ rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- nft_set_elem_change_active(set, &he->ext);
+ if (he != NULL) {
+ if (!nft_set_elem_mark_busy(&he->ext))
+ nft_set_elem_change_active(set, &he->ext);
+ else
+ he = NULL;
+ }
+ rcu_read_unlock();
return he;
}
@@ -170,6 +216,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
+ if (nft_set_elem_expired(&he->ext))
+ goto cont;
if (!nft_set_elem_active(&he->ext, genmask))
goto cont;
@@ -188,6 +236,55 @@ out:
rhashtable_walk_exit(&hti);
}
+static void nft_hash_gc(struct work_struct *work)
+{
+ struct nft_set *set;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
+ int err;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = nft_set_container_of(priv);
+
+ err = rhashtable_walk_init(&priv->ht, &hti);
+ if (err)
+ goto schedule;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN)
+ goto out;
+ continue;
+ }
+
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ if (nft_set_elem_mark_busy(&he->ext))
+ continue;
+
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb == NULL)
+ goto out;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, he);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ nft_set_gc_batch_complete(gcb);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
return sizeof(struct nft_hash);
@@ -207,11 +304,20 @@ static int nft_hash_init(const struct nft_set *set,
{
struct nft_hash *priv = nft_set_priv(set);
struct rhashtable_params params = nft_hash_params;
+ int err;
params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
params.key_len = set->klen;
- return rhashtable_init(&priv->ht, &params);
+ err = rhashtable_init(&priv->ht, &params);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ return 0;
}
static void nft_hash_elem_destroy(void *ptr, void *arg)
@@ -223,6 +329,7 @@ static void nft_hash_destroy(const struct nft_set *set)
{
struct nft_hash *priv = nft_set_priv(set);
+ cancel_delayed_work_sync(&priv->gc_work);
rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
(void *)set);
}
@@ -263,8 +370,9 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.deactivate = nft_hash_deactivate,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
+ .update = nft_hash_update,
.walk = nft_hash_walk,
- .features = NFT_SET_MAP,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 810385eb7249..db3b746858e3 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -24,12 +24,12 @@ struct nft_immediate_expr {
};
static void nft_immediate_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
- nft_data_copy(&data[priv->dreg], &priv->data);
+ nft_data_copy(&regs->data[priv->dreg], &priv->data, priv->dlen);
}
static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
@@ -49,17 +49,15 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
tb[NFTA_IMMEDIATE_DATA] == NULL)
return -EINVAL;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+ err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc,
+ tb[NFTA_IMMEDIATE_DATA]);
if (err < 0)
return err;
priv->dlen = desc.len;
- err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+ priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, &priv->data,
+ desc.type, desc.len);
if (err < 0)
goto err1;
@@ -81,7 +79,7 @@ static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_IMMEDIATE_DREG, priv->dreg))
goto nla_put_failure;
return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 85da5bd02f64..435c1ccd6c0e 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -27,7 +27,7 @@ struct nft_limit {
};
static void nft_limit_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
@@ -45,7 +45,7 @@ static void nft_limit_eval(const struct nft_expr *expr,
}
spin_unlock_bh(&limit_lock);
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
@@ -98,6 +98,7 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
.ops = &nft_limit_ops,
.policy = nft_limit_policy,
.maxattr = NFTA_LIMIT_MAX,
+ .flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index e18af9db2f04..a13d6a386d63 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -27,7 +27,7 @@ struct nft_log {
};
static void nft_log_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_log *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index a5f30b8760ea..b3c31ef8015d 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -26,19 +26,20 @@ struct nft_lookup {
};
static void nft_lookup_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
const struct nft_set_ext *ext;
- if (set->ops->lookup(set, &data[priv->sreg], &ext)) {
+ if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
if (set->flags & NFT_SET_MAP)
- nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext));
+ nft_data_copy(&regs->data[priv->dreg],
+ nft_set_ext_data(ext), set->dlen);
return;
}
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
@@ -70,8 +71,11 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ if (set->flags & NFT_SET_EVAL)
+ return -EOPNOTSUPP;
+
+ priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
+ err = nft_validate_register_load(priv->sreg, set->klen);
if (err < 0)
return err;
@@ -79,19 +83,16 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG]));
- err = nft_validate_output_register(priv->dreg);
+ priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ set->dtype, set->dlen);
if (err < 0)
return err;
-
- if (priv->dreg == NFT_REG_VERDICT) {
- if (set->dtype != NFT_DATA_VERDICT)
- return -EINVAL;
- } else if (set->dtype == NFT_DATA_VERDICT)
- return -EINVAL;
} else if (set->flags & NFT_SET_MAP)
return -EINVAL;
+ priv->binding.flags = set->flags & NFT_SET_MAP;
+
err = nf_tables_bind_set(ctx, set, &priv->binding);
if (err < 0)
return err;
@@ -114,10 +115,10 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
goto nla_put_failure;
if (priv->set->flags & NFT_SET_MAP)
- if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
return 0;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5197874372ec..52561e1c31e2 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -25,62 +25,65 @@
#include <net/netfilter/nft_meta.h>
void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
const struct net_device *in = pkt->in, *out = pkt->out;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
switch (priv->key) {
case NFT_META_LEN:
- dest->data[0] = skb->len;
+ *dest = skb->len;
break;
case NFT_META_PROTOCOL:
- *(__be16 *)dest->data = skb->protocol;
+ *dest = 0;
+ *(__be16 *)dest = skb->protocol;
break;
case NFT_META_NFPROTO:
- dest->data[0] = pkt->ops->pf;
+ *dest = pkt->ops->pf;
break;
case NFT_META_L4PROTO:
- dest->data[0] = pkt->tprot;
+ *dest = pkt->tprot;
break;
case NFT_META_PRIORITY:
- dest->data[0] = skb->priority;
+ *dest = skb->priority;
break;
case NFT_META_MARK:
- dest->data[0] = skb->mark;
+ *dest = skb->mark;
break;
case NFT_META_IIF:
if (in == NULL)
goto err;
- dest->data[0] = in->ifindex;
+ *dest = in->ifindex;
break;
case NFT_META_OIF:
if (out == NULL)
goto err;
- dest->data[0] = out->ifindex;
+ *dest = out->ifindex;
break;
case NFT_META_IIFNAME:
if (in == NULL)
goto err;
- strncpy((char *)dest->data, in->name, sizeof(dest->data));
+ strncpy((char *)dest, in->name, IFNAMSIZ);
break;
case NFT_META_OIFNAME:
if (out == NULL)
goto err;
- strncpy((char *)dest->data, out->name, sizeof(dest->data));
+ strncpy((char *)dest, out->name, IFNAMSIZ);
break;
case NFT_META_IIFTYPE:
if (in == NULL)
goto err;
- *(u16 *)dest->data = in->type;
+ *dest = 0;
+ *(u16 *)dest = in->type;
break;
case NFT_META_OIFTYPE:
if (out == NULL)
goto err;
- *(u16 *)dest->data = out->type;
+ *dest = 0;
+ *(u16 *)dest = out->type;
break;
case NFT_META_SKUID:
if (skb->sk == NULL || !sk_fullsock(skb->sk))
@@ -93,8 +96,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
goto err;
}
- dest->data[0] =
- from_kuid_munged(&init_user_ns,
+ *dest = from_kuid_munged(&init_user_ns,
skb->sk->sk_socket->file->f_cred->fsuid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
@@ -108,8 +110,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
read_unlock_bh(&skb->sk->sk_callback_lock);
goto err;
}
- dest->data[0] =
- from_kgid_munged(&init_user_ns,
+ *dest = from_kgid_munged(&init_user_ns,
skb->sk->sk_socket->file->f_cred->fsgid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
@@ -119,33 +120,33 @@ void nft_meta_get_eval(const struct nft_expr *expr,
if (dst == NULL)
goto err;
- dest->data[0] = dst->tclassid;
+ *dest = dst->tclassid;
break;
}
#endif
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
- dest->data[0] = skb->secmark;
+ *dest = skb->secmark;
break;
#endif
case NFT_META_PKTTYPE:
if (skb->pkt_type != PACKET_LOOPBACK) {
- dest->data[0] = skb->pkt_type;
+ *dest = skb->pkt_type;
break;
}
switch (pkt->ops->pf) {
case NFPROTO_IPV4:
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
- dest->data[0] = PACKET_MULTICAST;
+ *dest = PACKET_MULTICAST;
else
- dest->data[0] = PACKET_BROADCAST;
+ *dest = PACKET_BROADCAST;
break;
case NFPROTO_IPV6:
if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
- dest->data[0] = PACKET_MULTICAST;
+ *dest = PACKET_MULTICAST;
else
- dest->data[0] = PACKET_BROADCAST;
+ *dest = PACKET_BROADCAST;
break;
default:
WARN_ON(1);
@@ -153,23 +154,22 @@ void nft_meta_get_eval(const struct nft_expr *expr,
}
break;
case NFT_META_CPU:
- dest->data[0] = raw_smp_processor_id();
+ *dest = raw_smp_processor_id();
break;
case NFT_META_IIFGROUP:
if (in == NULL)
goto err;
- dest->data[0] = in->group;
+ *dest = in->group;
break;
case NFT_META_OIFGROUP:
if (out == NULL)
goto err;
- dest->data[0] = out->group;
+ *dest = out->group;
break;
case NFT_META_CGROUP:
- if (skb->sk == NULL)
- break;
-
- dest->data[0] = skb->sk->sk_classid;
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ goto err;
+ *dest = skb->sk->sk_classid;
break;
default:
WARN_ON(1);
@@ -178,17 +178,17 @@ void nft_meta_get_eval(const struct nft_expr *expr,
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
- u32 value = data[meta->sreg].data[0];
+ u32 value = regs->data[meta->sreg];
switch (meta->key) {
case NFT_META_MARK:
@@ -218,22 +218,22 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
- int err;
+ unsigned int len;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
- case NFT_META_LEN:
case NFT_META_PROTOCOL:
+ case NFT_META_IIFTYPE:
+ case NFT_META_OIFTYPE:
+ len = sizeof(u16);
+ break;
case NFT_META_NFPROTO:
case NFT_META_L4PROTO:
+ case NFT_META_LEN:
case NFT_META_PRIORITY:
case NFT_META_MARK:
case NFT_META_IIF:
case NFT_META_OIF:
- case NFT_META_IIFNAME:
- case NFT_META_OIFNAME:
- case NFT_META_IIFTYPE:
- case NFT_META_OIFTYPE:
case NFT_META_SKUID:
case NFT_META_SKGID:
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -247,21 +247,19 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_IIFGROUP:
case NFT_META_OIFGROUP:
case NFT_META_CGROUP:
+ len = sizeof(u32);
+ break;
+ case NFT_META_IIFNAME:
+ case NFT_META_OIFNAME:
+ len = IFNAMSIZ;
break;
default:
return -EOPNOTSUPP;
}
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
- return 0;
+ priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
@@ -270,20 +268,24 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
case NFT_META_MARK:
case NFT_META_PRIORITY:
+ len = sizeof(u32);
+ break;
case NFT_META_NFTRACE:
+ len = sizeof(u8);
break;
default:
return -EOPNOTSUPP;
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
+ err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
return err;
@@ -298,7 +300,7 @@ int nft_meta_get_dump(struct sk_buff *skb,
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg))
goto nla_put_failure;
return 0;
@@ -314,7 +316,7 @@ int nft_meta_set_dump(struct sk_buff *skb,
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg))
goto nla_put_failure;
return 0;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index a0837c6c9283..ee2d71753746 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -37,7 +37,7 @@ struct nft_nat {
};
static void nft_nat_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_nat *priv = nft_expr_priv(expr);
@@ -49,33 +49,32 @@ static void nft_nat_eval(const struct nft_expr *expr,
if (priv->sreg_addr_min) {
if (priv->family == AF_INET) {
range.min_addr.ip = (__force __be32)
- data[priv->sreg_addr_min].data[0];
+ regs->data[priv->sreg_addr_min];
range.max_addr.ip = (__force __be32)
- data[priv->sreg_addr_max].data[0];
+ regs->data[priv->sreg_addr_max];
} else {
memcpy(range.min_addr.ip6,
- data[priv->sreg_addr_min].data,
- sizeof(struct nft_data));
+ &regs->data[priv->sreg_addr_min],
+ sizeof(range.min_addr.ip6));
memcpy(range.max_addr.ip6,
- data[priv->sreg_addr_max].data,
- sizeof(struct nft_data));
+ &regs->data[priv->sreg_addr_max],
+ sizeof(range.max_addr.ip6));
}
range.flags |= NF_NAT_RANGE_MAP_IPS;
}
if (priv->sreg_proto_min) {
range.min_proto.all =
- *(__be16 *)&data[priv->sreg_proto_min].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_min];
range.max_proto.all =
- *(__be16 *)&data[priv->sreg_proto_max].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_max];
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
range.flags |= priv->flags;
- data[NFT_REG_VERDICT].verdict =
- nf_nat_setup_info(ct, &range, priv->type);
+ regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type);
}
static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
@@ -119,6 +118,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_nat *priv = nft_expr_priv(expr);
+ unsigned int alen, plen;
u32 family;
int err;
@@ -146,25 +146,34 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (family != AF_INET && family != AF_INET6)
- return -EAFNOSUPPORT;
if (family != ctx->afi->family)
return -EOPNOTSUPP;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip);
+ break;
+ case NFPROTO_IPV6:
+ alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
priv->sreg_addr_min =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN]));
-
- err = nft_validate_input_register(priv->sreg_addr_min);
+ nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]);
+ err = nft_validate_register_load(priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
priv->sreg_addr_max =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX]));
+ nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]);
- err = nft_validate_input_register(priv->sreg_addr_max);
+ err = nft_validate_register_load(priv->sreg_addr_max,
+ alen);
if (err < 0)
return err;
} else {
@@ -172,19 +181,21 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
}
}
+ plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
priv->sreg_proto_min =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN]));
+ nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
- err = nft_validate_input_register(priv->sreg_proto_min);
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
priv->sreg_proto_max =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX]));
+ nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]);
- err = nft_validate_input_register(priv->sreg_proto_max);
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
@@ -220,18 +231,18 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (priv->sreg_addr_min) {
- if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN,
- htonl(priv->sreg_addr_min)) ||
- nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX,
- htonl(priv->sreg_addr_max)))
+ if (nft_dump_register(skb, NFTA_NAT_REG_ADDR_MIN,
+ priv->sreg_addr_min) ||
+ nft_dump_register(skb, NFTA_NAT_REG_ADDR_MAX,
+ priv->sreg_addr_max))
goto nla_put_failure;
}
if (priv->sreg_proto_min) {
- if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
- htonl(priv->sreg_proto_min)) ||
- nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
- htonl(priv->sreg_proto_max)))
+ if (nft_dump_register(skb, NFTA_NAT_REG_PROTO_MIN,
+ priv->sreg_proto_min) ||
+ nft_dump_register(skb, NFTA_NAT_REG_PROTO_MAX,
+ priv->sreg_proto_max))
goto nla_put_failure;
}
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 85daa84bfdfe..94fb3b27a2c5 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -18,12 +18,12 @@
#include <net/netfilter/nf_tables.h>
static void nft_payload_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
int offset;
switch (priv->base) {
@@ -43,11 +43,12 @@ static void nft_payload_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
@@ -62,24 +63,21 @@ static int nft_payload_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload *priv = nft_expr_priv(expr);
- int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]);
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_payload *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+ if (nft_dump_register(skb, NFTA_PAYLOAD_DREG, priv->dreg) ||
nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
@@ -131,9 +129,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
}
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
- return ERR_PTR(-EINVAL);
+ len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index e8ae2f6bf232..96805d21d618 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -28,7 +28,7 @@ struct nft_queue {
};
static void nft_queue_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_queue *priv = nft_expr_priv(expr);
@@ -51,7 +51,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
}
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 42d0ca45fb9e..1c30f41cff5b 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -30,8 +30,7 @@ struct nft_rbtree_elem {
};
-static bool nft_rbtree_lookup(const struct nft_set *set,
- const struct nft_data *key,
+static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
{
const struct nft_rbtree *priv = nft_set_priv(set);
@@ -45,7 +44,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = parent->rb_left;
interval = rbe;
@@ -91,9 +90,9 @@ static int __nft_rbtree_insert(const struct nft_set *set,
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(nft_set_ext_key(&rbe->ext),
- nft_set_ext_key(&new->ext),
- set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext),
+ nft_set_ext_key(&new->ext),
+ set->klen);
if (d < 0)
p = &parent->rb_left;
else if (d > 0)
@@ -153,8 +152,8 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key,
- set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
+ set->klen);
if (d < 0)
parent = parent->rb_left;
else if (d > 0)
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index d7e9e93a4e90..03f7bf40ae75 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -44,25 +44,28 @@ int nft_redir_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_redir *priv = nft_expr_priv(expr);
+ unsigned int plen;
int err;
err = nft_redir_validate(ctx, expr, NULL);
if (err < 0)
return err;
+ plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
- ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN]));
+ nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
- err = nft_validate_input_register(priv->sreg_proto_min);
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
priv->sreg_proto_max =
- ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX]));
+ nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]);
- err = nft_validate_input_register(priv->sreg_proto_max);
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
@@ -85,11 +88,11 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_redir *priv = nft_expr_priv(expr);
if (priv->sreg_proto_min) {
- if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN,
- htonl(priv->sreg_proto_min)))
+ if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN,
+ priv->sreg_proto_min))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX,
- htonl(priv->sreg_proto_max)))
+ if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX,
+ priv->sreg_proto_max))
goto nla_put_failure;
}
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 92877114aff4..62cabee42fbe 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -18,7 +18,7 @@
#include <net/netfilter/ipv6/nf_reject.h>
static void nft_reject_inet_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
@@ -58,7 +58,8 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
}
break;
}
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+
+ regs->verdict.code = NF_DROP;
}
static int nft_reject_inet_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index c205b26a2bee..cca96cec1b68 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -272,7 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
@@ -437,7 +437,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7198d660b4de..a1d126f29463 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -39,7 +39,7 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info *info = par->matchinfo;
- if (skb->sk == NULL)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
return (info->id == skb->sk->sk_classid) ^ info->invert;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 50a52043650f..1caaccbc306c 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -25,16 +25,15 @@ MODULE_ALIAS("ip6t_physdev");
static bool
physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct xt_physdev_info *info = par->matchinfo;
+ const struct net_device *physdev;
unsigned long ret;
const char *indev, *outdev;
- const struct nf_bridge_info *nf_bridge;
/* Not a bridged IP packet or no info available yet:
* LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
* the destination device will be a bridge. */
- if (!(nf_bridge = skb->nf_bridge)) {
+ if (!skb->nf_bridge) {
/* Return MATCH if the invert flags of the used options are on */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
!(info->invert & XT_PHYSDEV_OP_BRIDGED))
@@ -54,30 +53,41 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
return true;
}
+ physdev = nf_bridge_get_physoutdev(skb);
+ outdev = physdev ? physdev->name : NULL;
+
/* This only makes sense in the FORWARD and POSTROUTING chains */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
- (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+ (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
return false;
+ physdev = nf_bridge_get_physindev(skb);
+ indev = physdev ? physdev->name : NULL;
+
if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
- (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+ (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
(info->bitmask & XT_PHYSDEV_OP_ISOUT &&
- (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
+ (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
return false;
if (!(info->bitmask & XT_PHYSDEV_OP_IN))
goto match_outdev;
- indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
- ret = ifname_compare_aligned(indev, info->physindev, info->in_mask);
- if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
- return false;
+ if (indev) {
+ ret = ifname_compare_aligned(indev, info->physindev,
+ info->in_mask);
+
+ if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+ return false;
+ }
match_outdev:
if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
return true;
- outdev = nf_bridge->physoutdev ?
- nf_bridge->physoutdev->name : nulldevname;
+
+ if (!outdev)
+ return false;
+
ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 895534e87a47..e092cb046326 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,13 +143,10 @@ static bool xt_socket_sk_is_transparent(struct sock *sk)
}
}
-static bool
-socket_match(const struct sk_buff *skb, struct xt_action_param *par,
- const struct xt_socket_mtinfo1 *info)
+static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+ const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
u8 uninitialized_var(protocol);
@@ -159,10 +156,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
#endif
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
protocol = iph->protocol;
saddr = iph->saddr;
@@ -172,16 +171,17 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
- &sport, &dport))
- return false;
+ &sport, &dport))
+ return NULL;
} else {
- return false;
+ return NULL;
}
#ifdef XT_SOCKET_HAVE_CONNTRACK
- /* Do the lookup with the original socket address in case this is a
- * reply packet of an established SNAT-ted connection. */
-
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct) &&
((iph->protocol != IPPROTO_ICMP &&
@@ -197,10 +197,18 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
}
#endif
+ return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+ const struct xt_socket_mtinfo1 *info)
+{
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v4(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -225,12 +233,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
sk = NULL;
}
- pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
- protocol, &saddr, ntohs(sport),
- &daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
static bool
@@ -327,28 +330,26 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
return NULL;
}
-static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+ const struct net_device *indev)
{
- struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
- const struct in6_addr *daddr = NULL, *saddr = NULL;
__be16 uninitialized_var(dport), uninitialized_var(sport);
- int thoff = 0, uninitialized_var(tproto);
- const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ const struct in6_addr *daddr = NULL, *saddr = NULL;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0) {
pr_debug("unable to find transport header in IPv6 packet, dropping\n");
- return NF_DROP;
+ return NULL;
}
if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
- hp = skb_header_pointer(skb, thoff,
- sizeof(_hdr), &_hdr);
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
saddr = &iph->saddr;
sport = hp->source;
@@ -356,17 +357,27 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
dport = hp->dest;
} else if (tproto == IPPROTO_ICMPV6) {
+ struct ipv6hdr ipv6_var;
+
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport, &ipv6_var))
- return false;
+ return NULL;
} else {
- return false;
+ return NULL;
}
+ return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v6(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -391,13 +402,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
sk = NULL;
}
- pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
- "(orig %pI6:%hu) sock %p\n",
- tproto, saddr, ntohs(sport),
- daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
#endif
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 9575a1892607..49ff32106080 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -907,6 +907,16 @@ static int nci_se_io(struct nfc_dev *nfc_dev, u32 se_idx,
return 0;
}
+static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
+{
+ struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
+
+ if (!ndev->ops->fw_download)
+ return -ENOTSUPP;
+
+ return ndev->ops->fw_download(ndev, firmware_name);
+}
+
static struct nfc_ops nci_nfc_ops = {
.dev_up = nci_dev_up,
.dev_down = nci_dev_down,
@@ -922,6 +932,7 @@ static struct nfc_ops nci_nfc_ops = {
.disable_se = nci_disable_se,
.discover_se = nci_discover_se,
.se_io = nci_se_io,
+ .fw_download = nci_fw_download,
};
/* ---- Interface to NCI drivers ---- */
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 14a2d11581da..3763036710ae 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1584,7 +1584,7 @@ static const struct genl_ops nfc_genl_ops[] = {
struct urelease_work {
struct work_struct w;
- int portid;
+ u32 portid;
};
static void nfc_urelease_event_work(struct work_struct *work)
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 50ec42f170a0..2dacc7b5af23 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -100,7 +100,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
new_stats =
kmem_cache_alloc_node(flow_stats_cache,
- GFP_THISNODE |
+ GFP_NOWAIT |
+ __GFP_THISNODE |
+ __GFP_NOWARN |
__GFP_NOMEMALLOC,
node);
if (likely(new_stats)) {
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 3277a7520e31..6d39766e7828 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -222,7 +222,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ struct sock *sk = vxlan_port->vs->sock->sk;
+ __be16 dst_port = inet_sk(sk)->inet_sport;
const struct ovs_key_ipv4_tunnel *tun_key;
struct vxlan_metadata md = {0};
struct rtable *rt;
@@ -255,7 +256,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
vxflags = vxlan_port->exts |
(tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0);
- err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst,
+ err = vxlan_xmit_skb(rt, sk, skb, fl.saddr, tun_key->ipv4_dst,
tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
src_port, dst_port,
&md, false, vxflags);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 378c3a6acf84..14f041398ca1 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -130,7 +130,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
rcu_read_lock();
conn = rds_conn_lookup(head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
- !is_outgoing) {
+ laddr == faddr && !is_outgoing) {
/* This is a looped back IB connection, and we're
* called by the code handling the incoming connect.
* We need a second connection object into which we
@@ -193,6 +193,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
}
atomic_set(&conn->c_state, RDS_CONN_DOWN);
+ conn->c_send_gen = 0;
conn->c_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c3f2855c3d84..0d41155a2258 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -110,6 +110,7 @@ struct rds_connection {
void *c_transport_data;
atomic_t c_state;
+ unsigned long c_send_gen;
unsigned long c_flags;
unsigned long c_reconnect_jiffies;
struct delayed_work c_send_w;
diff --git a/net/rds/send.c b/net/rds/send.c
index 44672befc0ee..e9430f537f9c 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -140,8 +140,11 @@ int rds_send_xmit(struct rds_connection *conn)
struct scatterlist *sg;
int ret = 0;
LIST_HEAD(to_be_dropped);
+ int batch_count;
+ unsigned long send_gen = 0;
restart:
+ batch_count = 0;
/*
* sendmsg calls here after having queued its message on the send
@@ -157,6 +160,17 @@ restart:
}
/*
+ * we record the send generation after doing the xmit acquire.
+ * if someone else manages to jump in and do some work, we'll use
+ * this to avoid a goto restart farther down.
+ *
+ * The acquire_in_xmit() check above ensures that only one
+ * caller can increment c_send_gen at any time.
+ */
+ conn->c_send_gen++;
+ send_gen = conn->c_send_gen;
+
+ /*
* rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
* we do the opposite to avoid races.
*/
@@ -202,6 +216,16 @@ restart:
if (!rm) {
unsigned int len;
+ batch_count++;
+
+ /* we want to process as big a batch as we can, but
+ * we also want to avoid softlockups. If we've been
+ * through a lot of messages, lets back off and see
+ * if anyone else jumps in
+ */
+ if (batch_count >= 1024)
+ goto over_batch;
+
spin_lock_irqsave(&conn->c_lock, flags);
if (!list_empty(&conn->c_send_queue)) {
@@ -357,9 +381,9 @@ restart:
}
}
+over_batch:
if (conn->c_trans->xmit_complete)
conn->c_trans->xmit_complete(conn);
-
release_in_xmit(conn);
/* Nuke any messages we decided not to retransmit. */
@@ -380,10 +404,15 @@ restart:
* If the transport cannot continue (i.e ret != 0), then it must
* call us when more room is available, such as from the tx
* completion handler.
+ *
+ * We have an extra generation check here so that if someone manages
+ * to jump in after our release_in_xmit, we'll see that they have done
+ * some work and we will skip our goto
*/
if (ret == 0) {
smp_mb();
- if (!list_empty(&conn->c_send_queue)) {
+ if (!list_empty(&conn->c_send_queue) &&
+ send_gen == conn->c_send_gen) {
rds_stats_inc(s_send_lock_queue_raced);
goto restart;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 4d2cede17468..dc6a2d324bd8 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -38,6 +38,9 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_bpf *prog = act->priv;
int action, filter_res;
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ return TC_ACT_UNSPEC;
+
spin_lock(&prog->tcf_lock);
prog->tcf_tm.lastuse = jiffies;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5953517ec059..3f63ceac8e01 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -157,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
if (!(at & AT_EGRESS)) {
if (m->tcfm_ok_push)
- skb_push(skb2, skb2->dev->hard_header_len);
+ skb_push(skb2, skb->mac_len);
}
/* mirror is always swallowed */
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5c4171c5d2bd..91bd9c19471d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -66,6 +66,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_bpf_prog *prog;
int ret = -1;
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ return -1;
+
/* Needed here for accessing maps. */
rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index eb5b8445fef9..4cdbfb85686a 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -88,11 +88,19 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* ------------------------------------------------------------- */
+static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ net_inc_ingress_queue();
+
+ return 0;
+}
+
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
tcf_destroy_chain(&p->filter_list);
+ net_dec_ingress_queue();
}
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -124,6 +132,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.id = "ingress",
.priv_size = sizeof(struct ingress_qdisc_data),
.enqueue = ingress_enqueue,
+ .init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 179f1c8c0d8b..956ead2cab9a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -560,8 +560,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
tfifo_dequeue:
skb = __skb_dequeue(&sch->q);
if (skb) {
-deliver:
qdisc_qstats_backlog_dec(sch, skb);
+deliver:
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
@@ -578,6 +578,7 @@ deliver:
rb_erase(p, &q->t_root);
sch->q.qlen--;
+ qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
skb->tstamp = netem_skb_cb(skb)->tstamp_save;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index fb78117b896c..9068e72aa73c 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -1,9 +1,11 @@
config SUNRPC
tristate
+ depends on MULTIUSER
config SUNRPC_GSS
tristate
select OID_REGISTRY
+ depends on MULTIUSER
config SUNRPC_BACKCHANNEL
bool
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 5199bb1a017e..2928afffbb81 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1072,10 +1072,12 @@ void qword_add(char **bpp, int *lp, char *str)
if (len < 0) return;
- ret = string_escape_str(str, &bp, len, ESCAPE_OCTAL, "\\ \n\t");
- if (ret < 0 || ret == len)
+ ret = string_escape_str(str, bp, len, ESCAPE_OCTAL, "\\ \n\t");
+ if (ret >= len) {
+ bp += len;
len = -1;
- else {
+ } else {
+ bp += ret;
len -= ret;
*bp++ = ' ';
len--;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 124676c13780..e28909fddd30 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1136,7 +1136,7 @@ rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
int i, rc;
i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
- dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
+ dprintk("RPC: %s: initializing %d FMRs\n", __func__, i);
while (i--) {
r = kzalloc(sizeof(*r), GFP_KERNEL);
@@ -1169,7 +1169,7 @@ rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
int i, rc;
i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
- dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
+ dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i);
while (i--) {
r = kzalloc(sizeof(*r), GFP_KERNEL);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ef3d7aa2854a..66deebc66aa1 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -176,7 +176,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
goto tx_error;
}
ttl = ip4_dst_hoplimit(&rt->dst);
- err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr,
+ err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone,
+ src->ipv4.s_addr,
dst->ipv4.s_addr, 0, ttl, 0,
src->udp_port, dst->udp_port,
false, true);
@@ -197,7 +198,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
if (err)
goto tx_error;
ttl = ip6_dst_hoplimit(ndst);
- err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6,
+ err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone,
+ ndst->dev, &src->ipv6,
&dst->ipv6, 0, ttl, src->udp_port,
dst->udp_port, false);
#endif
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index b13dfb4ff001..4f5543dd2524 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -175,7 +175,7 @@ config CFG80211_INTERNAL_REGDB
Most distributions have a CRDA package. So if unsure, say N.
config CFG80211_WEXT
- bool "cfg80211 wireless extensions compatibility"
+ bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
depends on CFG80211
select WEXT_CORE
default y if CFG80211_WEXT_EXPORT
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6dd1ab3b10ea..dd78445c7d50 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5664,7 +5664,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
}
}
- r = set_regdom(rd);
+ r = set_regdom(rd, REGD_SOURCE_CRDA);
/* set_regdom took ownership */
rd = NULL;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index be5f81caa488..0e347f888fe9 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -135,6 +135,11 @@ static spinlock_t reg_indoor_lock;
/* Used to track the userspace process controlling the indoor setting */
static u32 reg_is_indoor_portid;
+/* Max number of consecutive attempts to communicate with CRDA */
+#define REG_MAX_CRDA_TIMEOUTS 10
+
+static u32 reg_crda_timeouts;
+
static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
{
return rtnl_dereference(cfg80211_regdomain);
@@ -485,7 +490,7 @@ static void reg_regdb_search(struct work_struct *work)
mutex_unlock(&reg_regdb_search_mutex);
if (!IS_ERR_OR_NULL(regdom))
- set_regdom(regdom);
+ set_regdom(regdom, REGD_SOURCE_INTERNAL_DB);
rtnl_unlock();
}
@@ -535,15 +540,20 @@ static int call_crda(const char *alpha2)
snprintf(country, sizeof(country), "COUNTRY=%c%c",
alpha2[0], alpha2[1]);
+ /* query internal regulatory database (if it exists) */
+ reg_regdb_query(alpha2);
+
+ if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) {
+ pr_info("Exceeded CRDA call max attempts. Not calling CRDA\n");
+ return -EINVAL;
+ }
+
if (!is_world_regdom((char *) alpha2))
pr_info("Calling CRDA for country: %c%c\n",
alpha2[0], alpha2[1]);
else
pr_info("Calling CRDA to update world regulatory domain\n");
- /* query internal regulatory database (if it exists) */
- reg_regdb_query(alpha2);
-
return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
}
@@ -2293,6 +2303,9 @@ int regulatory_hint_user(const char *alpha2,
request->initiator = NL80211_REGDOM_SET_BY_USER;
request->user_reg_hint_type = user_reg_hint_type;
+ /* Allow calling CRDA again */
+ reg_crda_timeouts = 0;
+
queue_regulatory_request(request);
return 0;
@@ -2362,6 +2375,9 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
request->alpha2[1] = alpha2[1];
request->initiator = NL80211_REGDOM_SET_BY_DRIVER;
+ /* Allow calling CRDA again */
+ reg_crda_timeouts = 0;
+
queue_regulatory_request(request);
return 0;
@@ -2415,6 +2431,9 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band,
request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE;
request->country_ie_env = env;
+ /* Allow calling CRDA again */
+ reg_crda_timeouts = 0;
+
queue_regulatory_request(request);
request = NULL;
out:
@@ -2893,7 +2912,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
* multiple drivers can be ironed out later. Caller must've already
* kmalloc'd the rd structure.
*/
-int set_regdom(const struct ieee80211_regdomain *rd)
+int set_regdom(const struct ieee80211_regdomain *rd,
+ enum ieee80211_regd_source regd_src)
{
struct regulatory_request *lr;
bool user_reset = false;
@@ -2904,6 +2924,9 @@ int set_regdom(const struct ieee80211_regdomain *rd)
return -EINVAL;
}
+ if (regd_src == REGD_SOURCE_CRDA)
+ reg_crda_timeouts = 0;
+
lr = get_last_request();
/* Note that this doesn't update the wiphys, this is done below */
@@ -3063,6 +3086,7 @@ static void reg_timeout_work(struct work_struct *work)
{
REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
rtnl_lock();
+ reg_crda_timeouts++;
restore_regulatory_settings(true);
rtnl_unlock();
}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index a2c4e16459da..9f495d76eca0 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -16,6 +16,11 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+enum ieee80211_regd_source {
+ REGD_SOURCE_INTERNAL_DB,
+ REGD_SOURCE_CRDA,
+};
+
extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain;
bool reg_is_valid_request(const char *alpha2);
@@ -46,7 +51,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy);
int __init regulatory_init(void);
void regulatory_exit(void);
-int set_regdom(const struct ieee80211_regdomain *rd);
+int set_regdom(const struct ieee80211_regdomain *rd,
+ enum ieee80211_regd_source regd_src);
+
unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
const struct ieee80211_reg_rule *rule);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index ea1da6621ff0..d11454f87bac 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -42,7 +42,7 @@ struct cfg80211_conn {
CFG80211_CONN_CONNECTED,
} state;
u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
- u8 *ie;
+ const u8 *ie;
size_t ie_len;
bool auto_auth, prev_bssid_valid;
};
@@ -423,6 +423,62 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev)
schedule_work(&rdev->conn_work);
}
+static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev,
+ const u8 *ies, size_t ies_len,
+ const u8 **out_ies, size_t *out_ies_len)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ u8 *buf;
+ size_t offs;
+
+ if (!rdev->wiphy.extended_capabilities_len ||
+ (ies && cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY, ies, ies_len))) {
+ *out_ies = kmemdup(ies, ies_len, GFP_KERNEL);
+ if (!*out_ies)
+ return -ENOMEM;
+ *out_ies_len = ies_len;
+ return 0;
+ }
+
+ buf = kmalloc(ies_len + rdev->wiphy.extended_capabilities_len + 2,
+ GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (ies_len) {
+ static const u8 before_extcapa[] = {
+ /* not listing IEs expected to be created by driver */
+ WLAN_EID_RSN,
+ WLAN_EID_QOS_CAPA,
+ WLAN_EID_RRM_ENABLED_CAPABILITIES,
+ WLAN_EID_MOBILITY_DOMAIN,
+ WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+ WLAN_EID_BSS_COEX_2040,
+ };
+
+ offs = ieee80211_ie_split(ies, ies_len, before_extcapa,
+ ARRAY_SIZE(before_extcapa), 0);
+ memcpy(buf, ies, offs);
+ /* leave a whole for extended capabilities IE */
+ memcpy(buf + offs + rdev->wiphy.extended_capabilities_len + 2,
+ ies + offs, ies_len - offs);
+ } else {
+ offs = 0;
+ }
+
+ /* place extended capabilities IE (with only driver capabilities) */
+ buf[offs] = WLAN_EID_EXT_CAPABILITY;
+ buf[offs + 1] = rdev->wiphy.extended_capabilities_len;
+ memcpy(buf + offs + 2,
+ rdev->wiphy.extended_capabilities,
+ rdev->wiphy.extended_capabilities_len);
+
+ *out_ies = buf;
+ *out_ies_len = ies_len + rdev->wiphy.extended_capabilities_len + 2;
+
+ return 0;
+}
+
static int cfg80211_sme_connect(struct wireless_dev *wdev,
struct cfg80211_connect_params *connect,
const u8 *prev_bssid)
@@ -453,16 +509,14 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN);
}
- if (connect->ie) {
- wdev->conn->ie = kmemdup(connect->ie, connect->ie_len,
- GFP_KERNEL);
- wdev->conn->params.ie = wdev->conn->ie;
- if (!wdev->conn->ie) {
- kfree(wdev->conn);
- wdev->conn = NULL;
- return -ENOMEM;
- }
+ if (cfg80211_sme_get_conn_ies(wdev, connect->ie, connect->ie_len,
+ &wdev->conn->ie,
+ &wdev->conn->params.ie_len)) {
+ kfree(wdev->conn);
+ wdev->conn = NULL;
+ return -ENOMEM;
}
+ wdev->conn->params.ie = wdev->conn->ie;
if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) {
wdev->conn->auto_auth = true;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index f218b151530a..70051ab52f4f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1290,6 +1290,47 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
}
EXPORT_SYMBOL(cfg80211_get_p2p_attr);
+static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
+{
+ int i;
+
+ for (i = 0; i < n_ids; i++)
+ if (ids[i] == id)
+ return true;
+ return false;
+}
+
+size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
+ const u8 *ids, int n_ids,
+ const u8 *after_ric, int n_after_ric,
+ size_t offset)
+{
+ size_t pos = offset;
+
+ while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
+ if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
+ pos += 2 + ies[pos + 1];
+
+ while (pos < ielen &&
+ !ieee80211_id_in_list(after_ric, n_after_ric,
+ ies[pos]))
+ pos += 2 + ies[pos + 1];
+ } else {
+ pos += 2 + ies[pos + 1];
+ }
+ }
+
+ return pos;
+}
+EXPORT_SYMBOL(ieee80211_ie_split_ric);
+
+size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
+ const u8 *ids, int n_ids, size_t offset)
+{
+ return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset);
+}
+EXPORT_SYMBOL(ieee80211_ie_split);
+
bool ieee80211_operating_class_to_band(u8 operating_class,
enum ieee80211_band *band)
{
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 85d1d4764612..526c4feb3b50 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -238,11 +238,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp->xvec[skb->sp->len++] = x;
- if (xfrm_tunnel_check(skb, x, family)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
- goto drop;
- }
-
spin_lock(&x->lock);
if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
@@ -271,6 +266,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
spin_unlock(&x->lock);
+ if (xfrm_tunnel_check(skb, x, family)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
+ goto drop;
+ }
+
seq_hi = htonl(xfrm_replay_seqhi(x, seq));
XFRM_SKB_CB(skb)->seq.input.low = seq;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 7c532856b398..fbcedbe33190 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -19,7 +19,7 @@
#include <net/dst.h>
#include <net/xfrm.h>
-static int xfrm_output2(struct sk_buff *skb);
+static int xfrm_output2(struct sock *sk, struct sk_buff *skb);
static int xfrm_skb_check_space(struct sk_buff *skb)
{
@@ -130,7 +130,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err)
return dst_output(skb);
err = nf_hook(skb_dst(skb)->ops->family,
- NF_INET_POST_ROUTING, skb,
+ NF_INET_POST_ROUTING, skb->sk, skb,
NULL, skb_dst(skb)->dev, xfrm_output2);
if (unlikely(err != 1))
goto out;
@@ -144,12 +144,12 @@ out:
}
EXPORT_SYMBOL_GPL(xfrm_output_resume);
-static int xfrm_output2(struct sk_buff *skb)
+static int xfrm_output2(struct sock *sk, struct sk_buff *skb)
{
return xfrm_output_resume(skb, 1);
}
-static int xfrm_output_gso(struct sk_buff *skb)
+static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff *segs;
@@ -165,7 +165,7 @@ static int xfrm_output_gso(struct sk_buff *skb)
int err;
segs->next = NULL;
- err = xfrm_output2(segs);
+ err = xfrm_output2(sk, segs);
if (unlikely(err)) {
kfree_skb_list(nskb);
@@ -178,13 +178,13 @@ static int xfrm_output_gso(struct sk_buff *skb)
return 0;
}
-int xfrm_output(struct sk_buff *skb)
+int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
int err;
if (skb_is_gso(skb))
- return xfrm_output_gso(skb);
+ return xfrm_output_gso(sk, skb);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
err = skb_checksum_help(skb);
@@ -195,7 +195,7 @@ int xfrm_output(struct sk_buff *skb)
}
}
- return xfrm_output2(skb);
+ return xfrm_output2(sk, skb);
}
EXPORT_SYMBOL_GPL(xfrm_output);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7de2ed9ec46d..2091664295ba 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2423,6 +2423,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
const struct xfrm_link *link;
int type, err;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task())
+ return -ENOTSUPP;
+#endif
+
type = nlh->nlmsg_type;
if (type > XFRM_MSG_MAX)
return -EINVAL;