summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_mdb.c54
-rw-r--r--net/bridge/br_multicast.c18
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/core/flow_dissector.c30
-rw-r--r--net/core/pktgen.c14
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/core/sock.c32
-rw-r--r--net/decnet/dn_table.c1
-rw-r--r--net/dsa/Kconfig3
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/dsa/dsa2.c585
-rw-r--r--net/dsa/dsa_priv.h9
-rw-r--r--net/dsa/legacy.c22
-rw-r--r--net/dsa/master.c30
-rw-r--r--net/dsa/port.c16
-rw-r--r--net/dsa/slave.c20
-rw-r--r--net/dsa/switch.c6
-rw-r--r--net/dsa/tag_brcm.c70
-rw-r--r--net/dsa/tag_dsa.c2
-rw-r--r--net/dsa/tag_edsa.c2
-rw-r--r--net/dsa/tag_lan9303.c15
-rw-r--r--net/ipv4/ip_gre.c58
-rw-r--r--net/ipv4/netfilter/arp_tables.c22
-rw-r--r--net/ipv4/netfilter/ip_tables.c23
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c28
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c32
-rw-r--r--net/ipv4/tcp.c24
-rw-r--r--net/ipv4/tcp_input.c227
-rw-r--r--net/ipv4/tcp_ipv4.c13
-rw-r--r--net/ipv4/tcp_metrics.c4
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_offload.c12
-rw-r--r--net/ipv4/tcp_output.c24
-rw-r--r--net/ipv6/addrconf.c23
-rw-r--r--net/ipv6/ila/ila.h12
-rw-r--r--net/ipv6/ila/ila_common.c104
-rw-r--r--net/ipv6/ila/ila_lwt.c111
-rw-r--r--net/ipv6/ila/ila_xlat.c26
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c22
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c24
-rw-r--r--net/ipv6/output_core.c31
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_ip.c24
-rw-r--r--net/l2tp/l2tp_ip6.c24
-rw-r--r--net/l2tp/l2tp_ppp.c76
-rw-r--r--net/ncsi/ncsi-aen.c15
-rw-r--r--net/ncsi/ncsi-manage.c76
-rw-r--r--net/ncsi/ncsi-rsp.c41
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c2
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipset/pfxlen.c395
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c15
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c81
-rw-r--r--net/netfilter/nf_conntrack_netlink.c10
-rw-r--r--net/netfilter/nf_conntrack_proto.c86
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c62
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c41
-rw-r--r--net/netfilter/nf_nat_core.c9
-rw-r--r--net/netfilter/nf_tables_api.c195
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_set_bitmap.c18
-rw-r--r--net/netfilter/nft_set_hash.c41
-rw-r--r--net/netfilter/nft_set_rbtree.c73
-rw-r--r--net/netfilter/x_tables.c21
-rw-r--r--net/netfilter/xt_connlimit.c55
-rw-r--r--net/netlink/af_netlink.c17
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/nsh/nsh.c60
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/Makefile1
-rw-r--r--net/openvswitch/actions.c122
-rw-r--r--net/openvswitch/datapath.c43
-rw-r--r--net/openvswitch/datapath.h35
-rw-r--r--net/openvswitch/flow.c51
-rw-r--r--net/openvswitch/flow.h7
-rw-r--r--net/openvswitch/flow_netlink.c349
-rw-r--r--net/openvswitch/flow_netlink.h5
-rw-r--r--net/openvswitch/meter.c604
-rw-r--r--net/openvswitch/meter.h54
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rds/ib_recv.c10
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/act_vlan.c83
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/cls_basic.c20
-rw-r--r--net/sched/cls_bpf.c7
-rw-r--r--net/sched/cls_cgroup.c24
-rw-r--r--net/sched/cls_flow.c24
-rw-r--r--net/sched/cls_flower.c16
-rw-r--r--net/sched/cls_fw.c17
-rw-r--r--net/sched/cls_matchall.c15
-rw-r--r--net/sched/cls_route.c17
-rw-r--r--net/sched/cls_rsvp.h15
-rw-r--r--net/sched/cls_tcindex.c33
-rw-r--r--net/sched/cls_u32.c8
-rw-r--r--net/sched/sch_cbs.c4
-rw-r--r--net/sched/sch_mqprio.c5
-rw-r--r--net/sched/sch_netem.c140
-rw-r--r--net/sched/sch_red.c83
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/tipc/link.c26
-rw-r--r--net/tipc/msg.h10
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_policy.c71
137 files changed, 3559 insertions, 1704 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a0103500cc6d..8dfdd94e430f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -379,6 +379,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
+ if (event == NETDEV_DOWN &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
@@ -426,9 +429,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct net_device *tmp;
LIST_HEAD(close_list);
- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
-
/* Put all VLANs for this dev in the down state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a096d3e189da..7f98a7d25866 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -137,7 +137,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
mdst = br_mdb_get(br, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(br, eth_hdr(skb))) {
- if ((mdst && mdst->mglist) ||
+ if ((mdst && mdst->host_joined) ||
br_multicast_is_router(br)) {
local_rcv = true;
br->dev->stats.multicast++;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 31ddff22563e..b0f4c734900b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -292,6 +292,46 @@ err:
kfree(priv);
}
+static void br_mdb_switchdev_host_port(struct net_device *dev,
+ struct net_device *lower_dev,
+ struct br_mdb_entry *entry, int type)
+{
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_ID_HOST_MDB,
+ .flags = SWITCHDEV_F_DEFER,
+ },
+ .vid = entry->vid,
+ };
+
+ if (entry->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+#endif
+
+ mdb.obj.orig_dev = dev;
+ switch (type) {
+ case RTM_NEWMDB:
+ switchdev_port_obj_add(lower_dev, &mdb.obj);
+ break;
+ case RTM_DELMDB:
+ switchdev_port_obj_del(lower_dev, &mdb.obj);
+ break;
+ }
+}
+
+static void br_mdb_switchdev_host(struct net_device *dev,
+ struct br_mdb_entry *entry, int type)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter)
+ br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
+}
+
static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
struct br_mdb_entry *entry, int type)
{
@@ -317,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
#endif
mdb.obj.orig_dev = port_dev;
- if (port_dev && type == RTM_NEWMDB) {
+ if (p && port_dev && type == RTM_NEWMDB) {
complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
if (complete_info) {
complete_info->port = p;
@@ -327,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
if (switchdev_port_obj_add(port_dev, &mdb.obj))
kfree(complete_info);
}
- } else if (port_dev && type == RTM_DELMDB) {
+ } else if (p && port_dev && type == RTM_DELMDB) {
switchdev_port_obj_del(port_dev, &mdb.obj);
}
+ if (!p)
+ br_mdb_switchdev_host(dev, entry, type);
+
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -353,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
- entry.ifindex = port->dev->ifindex;
+ if (port)
+ entry.ifindex = port->dev->ifindex;
+ else
+ entry.ifindex = dev->ifindex;
entry.addr.proto = group->proto;
entry.addr.u.ip4 = group->u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
@@ -655,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
call_rcu_bh(&p->rcu, br_multicast_free_pg);
err = 0;
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 5f7f0e9d446c..cb4729539b82 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -249,7 +249,8 @@ static void br_multicast_group_expired(struct timer_list *t)
if (!netif_running(br->dev) || timer_pending(&mp->timer))
goto out;
- mp->mglist = false;
+ mp->host_joined = false;
+ br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
if (mp->ports)
goto out;
@@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
p->flags);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
@@ -773,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br,
goto err;
if (!port) {
- mp->mglist = true;
+ if (!mp->host_joined) {
+ mp->host_joined = true;
+ br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
+ }
mod_timer(&mp->timer, now + br->multicast_membership_interval);
goto out;
}
@@ -1477,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
max_delay *= br->multicast_last_member_count;
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1561,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
max_delay *= br->multicast_last_member_count;
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1622,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br,
br_mdb_notify(br->dev, port, group, RTM_DELMDB,
p->flags);
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}
@@ -1662,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br,
br->multicast_last_member_interval;
if (!port) {
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 40553d832b6e..1312b8d20ec3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -209,7 +209,7 @@ struct net_bridge_mdb_entry
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
- bool mglist;
+ bool host_joined;
};
struct net_bridge_mdb_htable
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3b3dcf719e07..37817d25b63d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2112,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
for (i = 0, j = 1 ; j < 4 ; j++, i++) {
struct compat_ebt_entry_mwt *match32;
unsigned int size;
- char *buf = buf_start;
+ char *buf = buf_start + offsets[i];
- buf = buf_start + offsets[i];
if (offsets[i] > offsets[j])
return -EINVAL;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1f5caafb4492..15ce30063765 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -10,6 +10,7 @@
#include <net/ipv6.h>
#include <net/gre.h>
#include <net/pptp.h>
+#include <net/tipc.h>
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
@@ -772,23 +773,22 @@ proto_again:
break;
}
case htons(ETH_P_TIPC): {
- struct {
- __be32 pre[3];
- __be32 srcnode;
- } *hdr, _hdr;
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ struct tipc_basic_hdr *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
+ data, hlen, &_hdr);
if (!hdr) {
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
}
if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
+ FLOW_DISSECTOR_KEY_TIPC)) {
key_addrs = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+ FLOW_DISSECTOR_KEY_TIPC,
target_container);
- key_addrs->tipcaddrs.srcnode = hdr->srcnode;
- key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
+ key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
+ key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
}
fdret = FLOW_DISSECT_RET_OUT_GOOD;
break;
@@ -1024,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
diff -= sizeof(flow->addrs.v6addrs);
break;
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- diff -= sizeof(flow->addrs.tipcaddrs);
+ case FLOW_DISSECTOR_KEY_TIPC:
+ diff -= sizeof(flow->addrs.tipckey);
break;
}
return (sizeof(*flow) - diff) / sizeof(u32);
@@ -1039,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
return (__force __be32)ipv6_addr_hash(
&flow->addrs.v6addrs.src);
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- return flow->addrs.tipcaddrs.srcnode;
+ case FLOW_DISSECTOR_KEY_TIPC:
+ return flow->addrs.tipckey.key;
default:
return 0;
}
@@ -1321,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
.offset = offsetof(struct flow_keys, addrs.v6addrs),
},
{
- .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
- .offset = offsetof(struct flow_keys, addrs.tipcaddrs),
+ .key_id = FLOW_DISSECTOR_KEY_TIPC,
+ .offset = offsetof(struct flow_keys, addrs.tipckey),
},
{
.key_id = FLOW_DISSECTOR_KEY_PORTS,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e3fa53a07d34..40db0b7e37ac 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
int datalen)
{
- struct timeval timestamp;
+ struct timespec64 timestamp;
struct pktgen_hdr *pgh;
pgh = skb_put(skb, sizeof(*pgh));
@@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
pgh->tv_sec = 0;
pgh->tv_usec = 0;
} else {
- do_gettimeofday(&timestamp);
+ /*
+ * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+ * as done by wireshark, or y2038 when interpreted as signed.
+ * This is probably harmless, but if anyone wants to improve
+ * it, we could introduce a variant that puts 64-bit nanoseconds
+ * into the respective header bytes.
+ * This would also be slightly faster to read.
+ */
+ ktime_get_real_ts64(&timestamp);
pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
+ pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
}
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dc5ad84ac096..dabba2a91fc8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -920,7 +920,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
- + nla_total_size(1); /* IFLA_PROTO_DOWN */
+ + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ nla_total_size(4) /* IFLA_IF_NETNSID */
+ 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 97e604d55d55..8134c00df6c2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4869,6 +4869,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
if (!xnet)
return;
+ ipvs_reset(skb);
skb_orphan(skb);
skb->mark = 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 759400053110..57bbd6040eb6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2346,16 +2346,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
/* guarantee minimum buffer size under pressure */
if (kind == SK_MEM_RECV) {
- if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+ if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
return 1;
} else { /* SK_MEM_SEND */
+ int wmem0 = sk_get_wmem0(sk, prot);
+
if (sk->sk_type == SOCK_STREAM) {
- if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+ if (sk->sk_wmem_queued < wmem0)
return 1;
- } else if (refcount_read(&sk->sk_wmem_alloc) <
- prot->sysctl_wmem[0])
+ } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
return 1;
+ }
}
if (sk_has_memory_pressure(sk)) {
@@ -3042,7 +3044,6 @@ struct prot_inuse {
static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
-#ifdef CONFIG_NET_NS
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3086,27 +3087,6 @@ static __init int net_inuse_init(void)
}
core_initcall(net_inuse_init);
-#else
-static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
-
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
-{
- __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
-
-int sock_prot_inuse_get(struct net *net, struct proto *prot)
-{
- int cpu, idx = prot->inuse_idx;
- int res = 0;
-
- for_each_possible_cpu(cpu)
- res += per_cpu(prot_inuse, cpu).val[idx];
-
- return res >= 0 ? res : 0;
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-#endif
static void assign_proto_idx(struct proto *prot)
{
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 08667f68e601..f0710b5d037d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -156,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz)
default:
printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
old_divisor);
+ /* fall through */
case 256:
new_divisor = 1024;
new_hashmask = 0x3FF;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cc5f8f971689..2fed892094bc 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -19,6 +19,9 @@ if NET_DSA
config NET_DSA_TAG_BRCM
bool
+config NET_DSA_TAG_BRCM_PREPEND
+ bool
+
config NET_DSA_TAG_DSA
bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index e9a4a0f33e86..0e13c1f95d13 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -5,6 +5,7 @@ dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
+dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index b8f2d9f7c3ed..6a9d0f50fbee 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -44,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_BRCM
[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+ [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops,
+#endif
#ifdef CONFIG_NET_DSA_TAG_DSA
[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
#endif
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 283104e5ca6a..44e3fb7dec8c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -94,14 +94,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst)
kref_put(&dst->refcount, dsa_tree_release);
}
-/* For platform data configurations, we need to have a valid name argument to
- * differentiate a disabled port from an enabled one
- */
-static bool dsa_port_is_valid(struct dsa_port *port)
-{
- return port->type != DSA_PORT_TYPE_UNUSED;
-}
-
static bool dsa_port_is_dsa(struct dsa_port *port)
{
return port->type == DSA_PORT_TYPE_DSA;
@@ -112,197 +104,214 @@ static bool dsa_port_is_cpu(struct dsa_port *port)
return port->type == DSA_PORT_TYPE_CPU;
}
-static bool dsa_ds_find_port_dn(struct dsa_switch *ds,
- struct device_node *port)
+static bool dsa_port_is_user(struct dsa_port *dp)
{
- u32 index;
-
- for (index = 0; index < ds->num_ports; index++)
- if (ds->ports[index].dn == port)
- return true;
- return false;
+ return dp->type == DSA_PORT_TYPE_USER;
}
-static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst,
- struct device_node *port)
+static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
+ struct device_node *dn)
{
struct dsa_switch *ds;
- u32 index;
+ struct dsa_port *dp;
+ int device, port;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- if (dsa_ds_find_port_dn(ds, port))
- return ds;
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ if (dp->dn == dn)
+ return dp;
+ }
}
return NULL;
}
-static int dsa_port_complete(struct dsa_switch_tree *dst,
- struct dsa_switch *src_ds,
- struct dsa_port *port,
- u32 src_port)
+static bool dsa_port_setup_routing_table(struct dsa_port *dp)
{
- struct device_node *link;
- int index;
- struct dsa_switch *dst_ds;
-
- for (index = 0;; index++) {
- link = of_parse_phandle(port->dn, "link", index);
- if (!link)
- break;
-
- dst_ds = dsa_dst_find_port_dn(dst, link);
- of_node_put(link);
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct device_node *dn = dp->dn;
+ struct of_phandle_iterator it;
+ struct dsa_port *link_dp;
+ int err;
- if (!dst_ds)
- return 1;
+ of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
+ link_dp = dsa_tree_find_port_by_node(dst, it.node);
+ if (!link_dp) {
+ of_node_put(it.node);
+ return false;
+ }
- src_ds->rtable[dst_ds->index] = src_port;
+ ds->rtable[link_dp->ds->index] = dp->index;
}
- return 0;
+ return true;
}
-/* A switch is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
{
- struct dsa_port *port;
- u32 index;
- int err;
+ bool complete = true;
+ struct dsa_port *dp;
+ int i;
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
- continue;
+ for (i = 0; i < DSA_MAX_SWITCHES; i++)
+ ds->rtable[i] = DSA_RTABLE_NONE;
- if (!dsa_port_is_dsa(port))
- continue;
+ for (i = 0; i < ds->num_ports; i++) {
+ dp = &ds->ports[i];
- err = dsa_port_complete(dst, ds, port, index);
- if (err != 0)
- return err;
+ if (dsa_port_is_dsa(dp)) {
+ complete = dsa_port_setup_routing_table(dp);
+ if (!complete)
+ break;
+ }
}
- return 0;
+ return complete;
}
-/* A tree is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_dst_complete(struct dsa_switch_tree *dst)
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
{
struct dsa_switch *ds;
- u32 index;
- int err;
+ bool complete = true;
+ int device;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- err = dsa_ds_complete(dst, ds);
- if (err != 0)
- return err;
+ complete = dsa_switch_setup_routing_table(ds);
+ if (!complete)
+ break;
}
- return 0;
+ return complete;
}
-static int dsa_dsa_port_apply(struct dsa_port *port)
+static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds = port->ds;
- int err;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- err = dsa_port_fixed_link_register_of(port);
- if (err) {
- dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
- port->index, err);
- return err;
- }
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
+ continue;
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
- return devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
-}
+ if (dsa_port_is_cpu(dp))
+ return dp;
+ }
+ }
-static void dsa_dsa_port_unapply(struct dsa_port *port)
-{
- devlink_port_unregister(&port->devlink_port);
- dsa_port_fixed_link_unregister_of(port);
+ return NULL;
}
-static int dsa_cpu_port_apply(struct dsa_port *port)
+static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds = port->ds;
- int err;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- err = dsa_port_fixed_link_register_of(port);
- if (err) {
- dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
- port->index, err);
- return err;
+ /* DSA currently only supports a single CPU port */
+ dst->cpu_dp = dsa_tree_find_first_cpu(dst);
+ if (!dst->cpu_dp) {
+ pr_warn("Tree has no master device\n");
+ return -EINVAL;
}
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
- err = devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
- return err;
+ /* Assign the default CPU port to all ports of the fabric */
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
+ continue;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ if (dsa_port_is_user(dp))
+ dp->cpu_dp = dst->cpu_dp;
+ }
+ }
+
+ return 0;
}
-static void dsa_cpu_port_unapply(struct dsa_port *port)
+static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
{
- devlink_port_unregister(&port->devlink_port);
- dsa_port_fixed_link_unregister_of(port);
+ /* DSA currently only supports a single CPU port */
+ dst->cpu_dp = NULL;
}
-static int dsa_user_port_apply(struct dsa_port *port)
+static int dsa_port_setup(struct dsa_port *dp)
{
- struct dsa_switch *ds = port->ds;
+ struct dsa_switch *ds = dp->ds;
int err;
- err = dsa_slave_create(port);
- if (err) {
- dev_warn(ds->dev, "Failed to create slave %d: %d\n",
- port->index, err);
- port->slave = NULL;
- return err;
- }
+ memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
- err = devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
+ err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
if (err)
return err;
- devlink_port_type_eth_set(&port->devlink_port, port->slave);
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ case DSA_PORT_TYPE_DSA:
+ err = dsa_port_fixed_link_register_of(dp);
+ if (err) {
+ dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+ ds->index, dp->index);
+ return err;
+ }
+
+ break;
+ case DSA_PORT_TYPE_USER:
+ err = dsa_slave_create(dp);
+ if (err)
+ dev_err(ds->dev, "failed to create slave for port %d.%d\n",
+ ds->index, dp->index);
+ else
+ devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
+ break;
+ }
return 0;
}
-static void dsa_user_port_unapply(struct dsa_port *port)
+static void dsa_port_teardown(struct dsa_port *dp)
{
- devlink_port_unregister(&port->devlink_port);
- if (port->slave) {
- dsa_slave_destroy(port->slave);
- port->slave = NULL;
+ devlink_port_unregister(&dp->devlink_port);
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ case DSA_PORT_TYPE_DSA:
+ dsa_port_fixed_link_unregister_of(dp);
+ break;
+ case DSA_PORT_TYPE_USER:
+ if (dp->slave) {
+ dsa_slave_destroy(dp->slave);
+ dp->slave = NULL;
+ }
+ break;
}
}
-static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static int dsa_switch_setup(struct dsa_switch *ds)
{
- struct dsa_port *port;
- u32 index;
int err;
/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
@@ -343,136 +352,145 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
return err;
}
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
- continue;
+ return 0;
+}
- if (dsa_port_is_dsa(port)) {
- err = dsa_dsa_port_apply(port);
- if (err)
- return err;
+static void dsa_switch_teardown(struct dsa_switch *ds)
+{
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
+
+ dsa_switch_unregister_notifier(ds);
+
+ if (ds->devlink) {
+ devlink_unregister(ds->devlink);
+ devlink_free(ds->devlink);
+ ds->devlink = NULL;
+ }
+
+}
+
+static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
+ int err;
+
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
continue;
- }
- if (dsa_port_is_cpu(port)) {
- err = dsa_cpu_port_apply(port);
+ err = dsa_switch_setup(ds);
+ if (err)
+ return err;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ err = dsa_port_setup(dp);
if (err)
return err;
- continue;
}
-
- err = dsa_user_port_apply(port);
- if (err)
- continue;
}
return 0;
}
-static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
{
- struct dsa_port *port;
- u32 index;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
continue;
- if (dsa_port_is_dsa(port)) {
- dsa_dsa_port_unapply(port);
- continue;
- }
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
- if (dsa_port_is_cpu(port)) {
- dsa_cpu_port_unapply(port);
- continue;
+ dsa_port_teardown(dp);
}
- dsa_user_port_unapply(port);
+ dsa_switch_teardown(ds);
}
+}
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_unregister(ds->slave_mii_bus);
+static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp = dst->cpu_dp;
+ struct net_device *master = cpu_dp->master;
- dsa_switch_unregister_notifier(ds);
+ /* DSA currently supports a single pair of CPU port and master device */
+ return dsa_master_setup(master, cpu_dp);
+}
- if (ds->devlink) {
- devlink_unregister(ds->devlink);
- devlink_free(ds->devlink);
- ds->devlink = NULL;
- }
+static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp = dst->cpu_dp;
+ struct net_device *master = cpu_dp->master;
+ return dsa_master_teardown(master);
}
-static int dsa_dst_apply(struct dsa_switch_tree *dst)
+static int dsa_tree_setup(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
- u32 index;
+ bool complete;
int err;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
-
- err = dsa_ds_apply(dst, ds);
- if (err)
- return err;
+ if (dst->setup) {
+ pr_err("DSA: tree %d already setup! Disjoint trees?\n",
+ dst->index);
+ return -EEXIST;
}
- /* If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dst->cpu_dp->master->dsa_ptr = dst->cpu_dp;
+ complete = dsa_tree_setup_routing_table(dst);
+ if (!complete)
+ return 0;
+
+ err = dsa_tree_setup_default_cpu(dst);
+ if (err)
+ return err;
- err = dsa_master_ethtool_setup(dst->cpu_dp->master);
+ err = dsa_tree_setup_switches(dst);
if (err)
return err;
- dst->applied = true;
+ err = dsa_tree_setup_master(dst);
+ if (err)
+ return err;
+
+ dst->setup = true;
+
+ pr_info("DSA: tree %d setup\n", dst->index);
return 0;
}
-static void dsa_dst_unapply(struct dsa_switch_tree *dst)
+static void dsa_tree_teardown(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
- u32 index;
-
- if (!dst->applied)
+ if (!dst->setup)
return;
- dsa_master_ethtool_restore(dst->cpu_dp->master);
+ dsa_tree_teardown_master(dst);
- dst->cpu_dp->master->dsa_ptr = NULL;
+ dsa_tree_teardown_switches(dst);
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
+ dsa_tree_teardown_default_cpu(dst);
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
+ pr_info("DSA: tree %d torn down\n", dst->index);
- dsa_ds_unapply(dst, ds);
- }
-
- dst->cpu_dp = NULL;
-
- pr_info("DSA: tree %d unapplied\n", dst->index);
- dst->applied = false;
+ dst->setup = false;
}
static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
unsigned int index)
{
+ dsa_tree_teardown(dst);
+
dst->ds[index] = NULL;
dsa_tree_put(dst);
}
@@ -481,6 +499,7 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
struct dsa_switch *ds)
{
unsigned int index = ds->index;
+ int err;
if (dst->ds[index])
return -EBUSY;
@@ -488,7 +507,11 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
dsa_tree_get(dst);
dst->ds[index] = ds;
- return 0;
+ err = dsa_tree_setup(dst);
+ if (err)
+ dsa_tree_remove_switch(dst, index);
+
+ return err;
}
static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
@@ -516,7 +539,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
- tag_protocol = ds->ops->get_tag_protocol(ds);
+ tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(tag_ops)) {
dev_warn(ds->dev, "No tagger for this switch\n");
@@ -532,86 +555,6 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
return 0;
}
-static int dsa_cpu_parse(struct dsa_port *port, u32 index,
- struct dsa_switch_tree *dst,
- struct dsa_switch *ds)
-{
- if (!dst->cpu_dp)
- dst->cpu_dp = port;
-
- return 0;
-}
-
-static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
-{
- struct dsa_port *port;
- u32 index;
- int err;
-
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port) ||
- dsa_port_is_dsa(port))
- continue;
-
- if (dsa_port_is_cpu(port)) {
- err = dsa_cpu_parse(port, index, dst, ds);
- if (err)
- return err;
- }
-
- }
-
- pr_info("DSA: switch %d %d parsed\n", dst->index, ds->index);
-
- return 0;
-}
-
-static int dsa_dst_parse(struct dsa_switch_tree *dst)
-{
- struct dsa_switch *ds;
- struct dsa_port *dp;
- u32 index;
- int port;
- int err;
-
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
-
- err = dsa_ds_parse(dst, ds);
- if (err)
- return err;
- }
-
- if (!dst->cpu_dp) {
- pr_warn("Tree has no master device\n");
- return -EINVAL;
- }
-
- /* Assign the default CPU port to all ports of the fabric */
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
- if (!dsa_port_is_valid(dp) ||
- dsa_port_is_dsa(dp) ||
- dsa_port_is_cpu(dp))
- continue;
-
- dp->cpu_dp = dst->cpu_dp;
- }
- }
-
- pr_info("DSA: tree %d parsed\n", dst->index);
-
- return 0;
-}
-
static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
{
struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
@@ -768,13 +711,18 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
return dsa_switch_parse_ports(ds, cd);
}
-static int _dsa_register_switch(struct dsa_switch *ds)
+static int dsa_switch_add(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+
+ return dsa_tree_add_switch(dst, ds);
+}
+
+static int dsa_switch_probe(struct dsa_switch *ds)
{
struct dsa_chip_data *pdata = ds->dev->platform_data;
struct device_node *np = ds->dev->of_node;
- struct dsa_switch_tree *dst;
- unsigned int index;
- int i, err;
+ int err;
if (np)
err = dsa_switch_parse_of(ds, np);
@@ -786,46 +734,7 @@ static int _dsa_register_switch(struct dsa_switch *ds)
if (err)
return err;
- index = ds->index;
- dst = ds->dst;
-
- /* Initialize the routing table */
- for (i = 0; i < DSA_MAX_SWITCHES; ++i)
- ds->rtable[i] = DSA_RTABLE_NONE;
-
- err = dsa_tree_add_switch(dst, ds);
- if (err)
- return err;
-
- err = dsa_dst_complete(dst);
- if (err < 0)
- goto out_del_dst;
-
- /* Not all switches registered yet */
- if (err == 1)
- return 0;
-
- if (dst->applied) {
- pr_info("DSA: Disjoint trees?\n");
- return -EINVAL;
- }
-
- err = dsa_dst_parse(dst);
- if (err)
- goto out_del_dst;
-
- err = dsa_dst_apply(dst);
- if (err) {
- dsa_dst_unapply(dst);
- goto out_del_dst;
- }
-
- return 0;
-
-out_del_dst:
- dsa_tree_remove_switch(dst, index);
-
- return err;
+ return dsa_switch_add(ds);
}
struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
@@ -855,27 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds)
int err;
mutex_lock(&dsa2_mutex);
- err = _dsa_register_switch(ds);
+ err = dsa_switch_probe(ds);
mutex_unlock(&dsa2_mutex);
return err;
}
EXPORT_SYMBOL_GPL(dsa_register_switch);
-static void _dsa_unregister_switch(struct dsa_switch *ds)
+static void dsa_switch_remove(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
unsigned int index = ds->index;
- dsa_dst_unapply(dst);
-
dsa_tree_remove_switch(dst, index);
}
void dsa_unregister_switch(struct dsa_switch *ds)
{
mutex_lock(&dsa2_mutex);
- _dsa_unregister_switch(ds);
+ dsa_switch_remove(ds);
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 253a613c40cd..7d036696e8c4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -108,8 +108,8 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
const unsigned char *addr, u16 vid);
/* master.c */
-int dsa_master_ethtool_setup(struct net_device *dev);
-void dsa_master_ethtool_restore(struct net_device *dev);
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
+void dsa_master_teardown(struct net_device *dev);
static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
int device, int port)
@@ -147,10 +147,10 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans);
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
@@ -191,6 +191,7 @@ void dsa_switch_unregister_notifier(struct dsa_switch *ds);
/* tag_brcm.c */
extern const struct dsa_device_ops brcm_netdev_ops;
+extern const struct dsa_device_ops brcm_prepend_netdev_ops;
/* tag_dsa.c */
extern const struct dsa_device_ops dsa_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 0511fe2feff7..84611d7fcfa2 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -151,7 +151,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
- tag_protocol = ops->get_tag_protocol(ds);
+ tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(tag_ops))
return PTR_ERR(tag_ops);
@@ -593,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
if (!configured)
return -EPROBE_DEFER;
- /*
- * If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dev->dsa_ptr = dst->cpu_dp;
-
- return dsa_master_ethtool_setup(dst->cpu_dp->master);
+ return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
}
static int dsa_probe(struct platform_device *pdev)
@@ -666,15 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
- dsa_master_ethtool_restore(dst->cpu_dp->master);
-
- dst->cpu_dp->master->dsa_ptr = NULL;
-
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
+ dsa_master_teardown(dst->cpu_dp->master);
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 5f3f57e372e0..00589147f042 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -85,7 +85,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
}
}
-int dsa_master_ethtool_setup(struct net_device *dev)
+static int dsa_master_ethtool_setup(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch *ds = cpu_dp->ds;
@@ -108,10 +108,36 @@ int dsa_master_ethtool_setup(struct net_device *dev)
return 0;
}
-void dsa_master_ethtool_restore(struct net_device *dev)
+static void dsa_master_ethtool_teardown(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
cpu_dp->orig_ethtool_ops = NULL;
}
+
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
+{
+ /* If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+
+ dev->dsa_ptr = cpu_dp;
+
+ return dsa_master_ethtool_setup(dev);
+}
+
+void dsa_master_teardown(struct net_device *dev)
+{
+ dsa_master_ethtool_teardown(dev);
+
+ dev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bb30b1a7de3a..bb4be2679904 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -17,7 +17,7 @@
#include "dsa_priv.h"
-static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
{
struct raw_notifier_head *nh = &dp->ds->dst->nh;
int err;
@@ -215,7 +215,7 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
return ds->ops->port_fdb_dump(ds, port, cb, data);
}
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans)
{
@@ -229,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
}
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
@@ -252,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp,
.vlan = vlan,
};
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+ if (br_vlan_enabled(dp->bridge_dev))
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+
+ return 0;
}
int dsa_port_vlan_del(struct dsa_port *dp,
@@ -264,7 +267,10 @@ int dsa_port_vlan_del(struct dsa_port *dp,
.vlan = vlan,
};
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+ if (br_vlan_enabled(dp->bridge_dev))
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+
+ return 0;
}
int dsa_port_fixed_link_register_of(struct dsa_port *dp)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 814ced75a0cc..d6e7a642493b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -304,6 +304,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ /* DSA can directly translate this to a normal MDB add,
+ * but on the CPU port.
+ */
+ err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
+ trans);
+ break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
trans);
@@ -326,6 +333,12 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ /* DSA can directly translate this to a normal MDB add,
+ * but on the CPU port.
+ */
+ err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
break;
@@ -342,11 +355,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
- attr->u.ppid.id_len = sizeof(ds->index);
- memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
+ attr->u.ppid.id_len = sizeof(dst->index);
+ memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
attr->u.brport_flags_support = 0;
@@ -1147,7 +1161,7 @@ static void dsa_slave_notify(struct net_device *dev, unsigned long val)
int dsa_slave_create(struct dsa_port *port)
{
- struct dsa_port *cpu_dp = port->cpu_dp;
+ const struct dsa_port *cpu_dp = port->cpu_dp;
struct net_device *master = cpu_dp->master;
struct dsa_switch *ds = port->ds;
const char *name = port->name;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e6c06aa349a6..29608d087a7c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -121,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (ds->index == info->sw_index)
set_bit(info->port, group);
for (port = 0; port < ds->num_ports; port++)
- if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ if (dsa_is_dsa_port(ds, port))
set_bit(port, group);
if (switchdev_trans_ph_prepare(trans)) {
@@ -133,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, group, ds->num_ports)
@@ -180,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, members, ds->num_ports)
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 9e082bae3cb0..e6e0b7b6025c 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -59,7 +59,9 @@
#define BRCM_EG_TC_MASK 0x7
#define BRCM_EG_PID_MASK 0x1f
-static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned int offset)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
u16 queue = skb_get_queue_mapping(skb);
@@ -70,10 +72,10 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
skb_push(skb, BRCM_TAG_LEN);
- memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN);
+ if (offset)
+ memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
- /* Build the tag after the MAC Source Address */
- brcm_tag = skb->data + 2 * ETH_ALEN;
+ brcm_tag = skb->data + offset;
/* Set the ingress opcode, traffic class, tag enforcment is
* deprecated
@@ -94,8 +96,10 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
return skb;
}
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt,
+ unsigned int offset)
{
int source_port;
u8 *brcm_tag;
@@ -103,8 +107,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
return NULL;
- /* skb->data points to the EtherType, the tag is right before it */
- brcm_tag = skb->data - 2;
+ brcm_tag = skb->data - offset;
/* The opcode should never be different than 0b000 */
if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
@@ -126,15 +129,60 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
+ return skb;
+}
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Build the tag after the MAC Source Address */
+ return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN);
+}
+
+
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ struct sk_buff *nskb;
+
+ /* skb->data points to the EtherType, the tag is right before it */
+ nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+ if (!nskb)
+ return nskb;
+
/* Move the Ethernet DA and SA */
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - BRCM_TAG_LEN,
+ memmove(nskb->data - ETH_HLEN,
+ nskb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- return skb;
+ return nskb;
}
const struct dsa_device_ops brcm_netdev_ops = {
.xmit = brcm_tag_xmit,
.rcv = brcm_tag_rcv,
};
+#endif
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* tag is prepended to the packet */
+ return brcm_tag_xmit_ll(skb, dev, 0);
+}
+
+static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt)
+{
+ /* tag is prepended to the packet */
+ return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+}
+
+const struct dsa_device_ops brcm_prepend_netdev_ops = {
+ .xmit = brcm_tag_xmit_prepend,
+ .rcv = brcm_tag_rcv_prepend,
+};
+#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index dbbcdafed8c3..cd13cfc542ce 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -141,6 +141,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
+ skb->offload_fwd_mark = 1;
+
return skb;
}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index f38a626b3a05..4083326b806e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -160,6 +160,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
+ skb->offload_fwd_mark = 1;
+
return skb;
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index e526c8967b98..b8c5e52b2eff 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -88,10 +88,12 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
}
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+ struct packet_type *pt)
{
u16 *lan9303_tag;
unsigned int source_port;
+ u16 ether_type_nw;
+ u8 ip_protocol;
if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
dev_warn_ratelimited(&dev->dev,
@@ -129,6 +131,17 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
skb->offload_fwd_mark = !ether_addr_equal(skb->data - ETH_HLEN,
eth_stp_addr);
+ /* We also need IGMP packets to have skb->offload_fwd_mark = 0.
+ * Solving this for all conceivable situations would add more cost to
+ * every packet. Instead we handle just the common case:
+ * No VLAN tag + Ethernet II framing.
+ * Test least probable term first.
+ */
+ ether_type_nw = lan9303_tag[2];
+ ip_protocol = *(skb->data + 9);
+ if (ip_protocol == IPPROTO_IGMP && ether_type_nw == htons(ETH_P_IP))
+ skb->offload_fwd_mark = 0;
+
return skb;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c105a315b1a3..bb6239169b1a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -773,20 +773,46 @@ free_skb:
return NETDEV_TX_OK;
}
+static void ipgre_link_update(struct net_device *dev, bool set_mtu)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int len;
+
+ len = tunnel->tun_hlen;
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+ len = tunnel->tun_hlen - len;
+ tunnel->hlen = tunnel->hlen + len;
+
+ dev->needed_headroom = dev->needed_headroom + len;
+ if (set_mtu)
+ dev->mtu = max_t(int, dev->mtu - len, 68);
+
+ if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
+ if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+ tunnel->encap.type == TUNNEL_ENCAP_NONE) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+ dev->features |= NETIF_F_LLTX;
+ }
+}
+
static int ipgre_tunnel_ioctl(struct net_device *dev,
struct ifreq *ifr, int cmd)
{
- int err;
struct ip_tunnel_parm p;
+ int err;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
return -EFAULT;
+
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
- ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
+ ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
+
p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
@@ -794,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
if (err)
return err;
+ if (cmd == SIOCCHGTUNNEL) {
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
+
+ if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+ ipgre_link_update(dev, true);
+ }
+
p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
return -EFAULT;
+
return 0;
}
@@ -1307,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
__u32 fwmark = t->fwmark;
+ struct ip_tunnel_parm p;
int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -1322,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
- return ip_tunnel_changelink(dev, tb, &p, fwmark);
+
+ err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+ if (err < 0)
+ return err;
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
+
+ if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+ ipgre_link_update(dev, !tb[IFLA_MTU]);
+
+ return 0;
}
static size_t ipgre_get_size(const struct net_device *dev)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e2770fd00be..f88221aebc9d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -634,6 +634,25 @@ static void get_counters(const struct xt_table_info *t,
}
}
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct arpt_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i;
+ }
+ cond_resched();
+ }
+}
+
static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
@@ -910,8 +929,7 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 39286e543ee6..4cbe5e80f3bf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -781,6 +781,26 @@ get_counters(const struct xt_table_info *t,
}
}
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ipt_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ const struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i; /* macro does multi eval of i */
+ }
+
+ cond_resched();
+ }
+}
+
static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
@@ -1070,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fe374da4bc13..89af9d88ca21 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -344,7 +344,7 @@ static void ipv4_hooks_unregister(struct net *net)
mutex_unlock(&register_ipv4_hooks);
}
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.l3proto = PF_INET,
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index a046c298413a..1849fedd9b81 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -81,7 +81,6 @@ static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeout)
{
/* Do not immediately delete the connection after the first
@@ -165,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
return NF_ACCEPT;
}
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
/* Small and modified version of icmp_rcv */
static int
icmp_error(struct net *net, struct nf_conn *tmpl,
@@ -177,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
- NULL, "nf_ct_icmp: short packet ");
+ icmp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: bad HW ICMP checksum ");
+ icmp_error_log(skb, net, pf, "bad hw icmp checksum");
return -NF_ACCEPT;
}
@@ -199,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: invalid ICMP type ");
+ icmp_error_log(skb, net, pf, "invalid icmp type");
return -NF_ACCEPT;
}
@@ -259,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-static int icmp_nlattr_tuple_size(void)
+static unsigned int icmp_nlattr_tuple_size(void)
{
- return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 127153f1ed8a..9f37c4727861 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -212,7 +212,6 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
- SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER),
SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER),
SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER),
SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a82b44038308..ef0ff3357a44 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -441,22 +441,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
- .procname = "tcp_wmem",
- .data = &sysctl_tcp_wmem,
- .maxlen = sizeof(sysctl_tcp_wmem),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- },
- {
- .procname = "tcp_rmem",
- .data = &sysctl_tcp_rmem,
- .maxlen = sizeof(sysctl_tcp_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- },
- {
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
.maxlen = sizeof(int),
@@ -1164,6 +1148,22 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = &zero,
.extra2 = &thousand,
},
+ {
+ .procname = "tcp_wmem",
+ .data = &init_net.ipv4.sysctl_tcp_wmem,
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ },
+ {
+ .procname = "tcp_rmem",
+ .data = &init_net.ipv4.sysctl_tcp_rmem,
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c4cb19ed4628..bf97317e6c97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -289,12 +289,7 @@ struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
long sysctl_tcp_mem[3] __read_mostly;
-int sysctl_tcp_wmem[3] __read_mostly;
-int sysctl_tcp_rmem[3] __read_mostly;
-
EXPORT_SYMBOL(sysctl_tcp_mem);
-EXPORT_SYMBOL(sysctl_tcp_rmem);
-EXPORT_SYMBOL(sysctl_tcp_wmem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
@@ -456,8 +451,8 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_sndbuf = sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+ sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
}
@@ -2514,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk,
return -EINVAL;
tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
- if (sock_net(sk)->ipv4.sysctl_tcp_fack)
- tcp_enable_fack(tp);
break;
case TCPOPT_TIMESTAMP:
if (opt.opt_val != 0)
@@ -2984,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_lost = tp->lost_out;
info->tcpi_retrans = tp->retrans_out;
- info->tcpi_fackets = tp->fackets_out;
now = tcp_jiffies32;
info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
@@ -3636,13 +3628,13 @@ void __init tcp_init(void)
max_wshare = min(4UL*1024*1024, limit);
max_rshare = min(6UL*1024*1024, limit);
- sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- sysctl_tcp_wmem[1] = 16*1024;
- sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
+ init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
+ init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
- sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
- sysctl_tcp_rmem[1] = 87380;
- sysctl_tcp_rmem[2] = max(87380, max_rshare);
+ init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
+ init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
pr_info("Hash tables configured (established %u bind %u)\n",
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0ada8bfc2ebd..c3447c5512fd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,7 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
-#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
@@ -320,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
- sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+ sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -354,7 +354,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
- int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1;
+ int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -409,7 +409,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
rcvmem <<= 2;
if (sk->sk_rcvbuf < rcvmem)
- sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+ sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
}
/* 4. Try to fixup all. It is made immediately after connection enters
@@ -457,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
icsk->icsk_ack.quick = 0;
- if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- sysctl_tcp_rmem[2]);
+ net->ipv4.sysctl_tcp_rmem[2]);
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -623,7 +624,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128;
- rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+ rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf;
@@ -840,18 +842,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
-/*
- * Packet counting of FACK is based on in-order assumptions, therefore TCP
- * disables it when reordering is detected
- */
-void tcp_disable_fack(struct tcp_sock *tp)
-{
- /* RFC3517 uses different metric in lost marker => reset on change */
- if (tcp_is_fack(tp))
- tp->lost_skb_hint = NULL;
- tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
-}
-
/* Take a notice that peer is sending D-SACKs */
static void tcp_dsack_seen(struct tcp_sock *tp)
{
@@ -859,42 +849,39 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
tp->rack.dsack_seen = 1;
}
-static void tcp_update_reordering(struct sock *sk, const int metric,
- const int ts)
+/* It's reordering when higher sequence was delivered (i.e. sacked) before
+ * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+ * distance is approximated in full-mss packet distance ("reordering").
+ */
+static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+ const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
- int mib_idx;
+ const u32 mss = tp->mss_cache;
+ u32 fack, metric;
- if (WARN_ON_ONCE(metric < 0))
+ fack = tcp_highest_sack_seq(tp);
+ if (!before(low_seq, fack))
return;
- if (metric > tp->reordering) {
- tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric);
-
+ metric = fack - low_seq;
+ if ((metric > tp->reordering * mss) && mss) {
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
tp->reordering,
- tp->fackets_out,
+ 0,
tp->sacked_out,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
- tcp_disable_fack(tp);
+ tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
}
tp->rack.reord = 1;
-
/* This exciting event is worth to be remembered. 8) */
- if (ts)
- mib_idx = LINUX_MIB_TCPTSREORDER;
- else if (tcp_is_reno(tp))
- mib_idx = LINUX_MIB_TCPRENOREORDER;
- else if (tcp_is_fack(tp))
- mib_idx = LINUX_MIB_TCPFACKREORDER;
- else
- mib_idx = LINUX_MIB_TCPSACKREORDER;
-
- NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk),
+ ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
}
/* This must be called before lost_out is incremented */
@@ -968,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
* 3. Loss detection event of two flavors:
* A. Scoreboard estimator decided the packet is lost.
* A'. Reno "three dupacks" marks head of queue lost.
- * A''. Its FACK modification, head until snd.fack is lost.
* B. SACK arrives sacking SND.NXT at the moment, when the
* segment was retransmitted.
* 4. D-SACK added new rule: D-SACK changes any tag to S.
@@ -1111,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
struct tcp_sacktag_state {
- int reord;
- int fack_count;
+ u32 reord;
/* Timestamps for earliest and latest never-retransmitted segment
* that was SACKed. RTO needs the earliest RTT to stay conservative,
* but congestion control should still get an accurate delay signal.
@@ -1188,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk,
u64 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
- int fack_count = state->fack_count;
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
- if (sacked & TCPCB_SACKED_ACKED)
- state->reord = min(fack_count, state->reord);
+ if ((sacked & TCPCB_SACKED_ACKED) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
}
/* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@ -1222,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
* which was in hole. It is reordering.
*/
if (before(start_seq,
- tcp_highest_sack_seq(tp)))
- state->reord = min(fack_count,
- state->reord);
+ tcp_highest_sack_seq(tp)) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
+
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
if (state->first_sackt == 0)
@@ -1243,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
tp->sacked_out += pcount;
tp->delivered += pcount; /* Out-of-order packets delivered */
- fack_count += pcount;
-
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
- if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+ if (tp->lost_skb_hint &&
before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
tp->lost_cnt_hint += pcount;
-
- if (fack_count > tp->fackets_out)
- tp->fackets_out = fack_count;
}
/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1498,7 +1479,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
}
out:
- state->fack_count += pcount;
return prev;
noop:
@@ -1577,8 +1557,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
tcp_highest_sack_seq(tp)))
tcp_advance_highest_sack(sk, skb);
}
-
- state->fack_count += tcp_skb_pcount(skb);
}
return skb;
}
@@ -1589,7 +1567,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
{
struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
struct sk_buff *skb;
- int unack_bytes;
while (*p) {
parent = *p;
@@ -1602,12 +1579,6 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
p = &parent->rb_right;
continue;
}
-
- state->fack_count = 0;
- unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una;
- if (state->mss_now && unack_bytes > 0)
- state->fack_count = unack_bytes / state->mss_now;
-
return skb;
}
return NULL;
@@ -1665,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
int first_sack_index;
state->flag = 0;
- state->reord = tp->packets_out;
+ state->reord = tp->snd_nxt;
- if (!tp->sacked_out) {
- if (WARN_ON(tp->fackets_out))
- tp->fackets_out = 0;
+ if (!tp->sacked_out)
tcp_highest_sack_reset(sk);
- }
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
@@ -1743,7 +1711,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
state->mss_now = tcp_current_mss(sk);
- state->fack_count = 0;
skb = NULL;
i = 0;
@@ -1801,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
cache++;
goto walk;
}
@@ -1816,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
}
skb = tcp_sacktag_skip(skb, sk, state, start_seq);
@@ -1836,9 +1801,8 @@ advance_sp:
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- if ((state->reord < tp->fackets_out) &&
- ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+ tcp_check_sack_reordering(sk, state->reord, 0);
tcp_verify_left_out(tp);
out:
@@ -1876,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
static void tcp_check_reno_reordering(struct sock *sk, const int addend)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_limit_reno_sacked(tp))
- tcp_update_reordering(sk, tp->packets_out + addend, 0);
+
+ if (!tcp_limit_reno_sacked(tp))
+ return;
+
+ tp->reordering = min_t(u32, tp->packets_out + addend,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1923,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
tp->undo_retrans = -1;
- tp->fackets_out = 0;
tp->sacked_out = 0;
}
@@ -1973,7 +1941,6 @@ void tcp_enter_loss(struct sock *sk)
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
- tp->fackets_out = 0;
}
tcp_clear_all_retrans_hints(tp);
@@ -2040,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
return false;
}
-static inline int tcp_fackets_out(const struct tcp_sock *tp)
-{
- return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
-}
-
/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
* counter when SACK is enabled (without SACK, sacked_out is used for
* that purpose).
*
- * Instead, with FACK TCP uses fackets_out that includes both SACKed
- * segments up to the highest received SACK block so far and holes in
- * between them.
- *
* With reordering, holes may still be in flight, so RFC3517 recovery
* uses pure sacked_out (total number of SACKed segments) even though
* it violates the RFC that uses duplicate ACKs, often these are equal
@@ -2062,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
*/
static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
{
- return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+ return tp->sacked_out + 1;
}
-/* Linux NewReno/SACK/FACK/ECN state machine.
+/* Linux NewReno/SACK/ECN state machine.
* --------------------------------------
*
* "Open" Normal state, no dubious events, fast path.
@@ -2130,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
* dynamically measured and adjusted. This is implemented in
* tcp_rack_mark_lost.
*
- * FACK (Disabled by default. Subsumbed by RACK):
- * It is the simplest heuristics. As soon as we decided
- * that something is lost, we decide that _all_ not SACKed
- * packets until the most forward SACK are lost. I.e.
- * lost_out = fackets_out - sacked_out and left_out = fackets_out.
- * It is absolutely correct estimate, if network does not reorder
- * packets. And it loses any connection to reality when reordering
- * takes place. We use FACK by default until reordering
- * is suspected on the path to this destination.
- *
* If the receiver does not support SACK:
*
* NewReno (RFC6582): in Recovery we assume that one segment
@@ -2188,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
}
/* Detect loss in event "A" above by marking head of queue up as lost.
- * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * For non-SACK(Reno) senders, the first "packets" number of segments
* are considered lost. For RFC3517 SACK, a segment is considered lost if it
* has at least tp->reordering SACKed seqments above it; "packets" refers to
* the maximum SACKed segments to pass before reaching this limit.
@@ -2224,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
break;
oldcnt = cnt;
- if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+ if (tcp_is_reno(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
cnt += tcp_skb_pcount(skb);
if (cnt > packets) {
- if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+ if (tcp_is_sack(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
(oldcnt >= packets))
break;
@@ -2260,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
if (tcp_is_reno(tp)) {
tcp_mark_head_lost(sk, 1, 1);
- } else if (tcp_is_fack(tp)) {
- int lost = tp->fackets_out - tp->reordering;
- if (lost <= 0)
- lost = 1;
- tcp_mark_head_lost(sk, lost, 0);
} else {
int sacked_upto = tp->sacked_out - tp->reordering;
if (sacked_upto >= 0)
@@ -2611,7 +2554,6 @@ void tcp_simple_retransmit(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk);
- u32 prior_lost = tp->lost_out;
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
if (tcp_skb_seglen(skb) > mss &&
@@ -2626,7 +2568,7 @@ void tcp_simple_retransmit(struct sock *sk)
tcp_clear_retrans_hints_partial(tp);
- if (prior_lost == tp->lost_out)
+ if (!tp->lost_out)
return;
if (tcp_is_reno(tp))
@@ -2734,15 +2676,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
}
/* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && tcp_packet_delayed(tp)) {
/* Plain luck! Hole if filled with delayed
- * packet, rather than with a retransmit.
+ * packet, rather than with a retransmit. Check reordering.
*/
- tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+ tcp_check_sack_reordering(sk, prior_snd_una, 1);
/* We are getting evidence that the reordering degree is higher
* than we realized. If there are no retransmits out then we
@@ -2778,6 +2720,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
}
}
+static bool tcp_force_fast_retransmit(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return after(tcp_highest_sack_seq(tp),
+ tp->snd_una + tp->reordering * tp->mss_cache);
+}
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2790,19 +2740,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
-static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- (tcp_fackets_out(tp) > tp->reordering));
+ tcp_force_fast_retransmit(sk));
if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
- if (!tp->sacked_out && tp->fackets_out)
- tp->fackets_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -2849,11 +2797,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked))
+ if (tcp_try_undo_partial(sk, prior_snd_una))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
- tcp_fackets_out(tp) > tp->reordering;
+ tcp_force_fast_retransmit(sk);
}
if (tcp_try_undo_dsack(sk)) {
tcp_try_keep_open(sk);
@@ -3063,15 +3011,15 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
-static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, int *acked,
+static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+ u32 prior_snd_una,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u64 first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
- u32 reord = tp->packets_out;
+ u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
struct sk_buff *skb, *next;
bool fully_acked = true;
long sack_rtt_us = -1L;
@@ -3086,6 +3034,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ const u32 start_seq = scb->seq;
u8 sacked = scb->sacked;
u32 acked_pcount;
@@ -3116,7 +3065,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt = last_ackt;
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
- reord = min(pkts_acked, reord);
+ if (before(start_seq, reord))
+ reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
flag |= FLAG_ORIG_SACK_ACKED;
}
@@ -3194,16 +3144,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets && reord <= tp->fackets_out)
- tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ if (before(reord, prior_fack))
+ tcp_check_sack_reordering(sk, reord, 0);
- delta = tcp_is_fack(tp) ? pkts_acked :
- prior_sacked - tp->sacked_out;
+ delta = prior_sacked - tp->sacked_out;
tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
}
-
- tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3244,7 +3190,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
}
}
#endif
- *acked = pkts_acked;
return flag;
}
@@ -3553,12 +3498,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_fackets;
int prior_packets = tp->packets_out;
u32 delivered = tp->delivered;
u32 lost = tp->lost;
- int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ u32 prior_fack;
sack_state.first_sackt = 0;
sack_state.rate = &rs;
@@ -3590,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_retransmits = 0;
}
- prior_fackets = tp->fackets_out;
+ prior_fack = tcp_highest_sack_seq(tp);
rs.prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
@@ -3646,8 +3590,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- &sack_state);
+ flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
tcp_rack_update_reo_wnd(sk, &rs);
@@ -3659,7 +3602,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@ -3675,7 +3619,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3697,7 +3642,8 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
tcp_xmit_recovery(sk, rexmit);
}
@@ -5706,9 +5652,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack)
- tcp_enable_fack(tp);
-
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0162c577bb9c..1eac84b8044e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2409,8 +2409,8 @@ struct proto tcp_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
- .sysctl_wmem = sysctl_tcp_wmem,
- .sysctl_rmem = sysctl_tcp_rmem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -2509,7 +2509,14 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
-
+ if (net != &init_net) {
+ memcpy(net->ipv4.sysctl_tcp_rmem,
+ init_net.ipv4.sysctl_tcp_rmem,
+ sizeof(init_net.ipv4.sysctl_tcp_rmem));
+ memcpy(net->ipv4.sysctl_tcp_wmem,
+ init_net.ipv4.sysctl_tcp_wmem,
+ sizeof(init_net.ipv4.sysctl_tcp_wmem));
+ }
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 9d5ddebfd831..7097f92d16e5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -470,10 +470,8 @@ void tcp_init_metrics(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
}
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
- if (val && tp->reordering != val) {
- tcp_disable_fack(tp);
+ if (val && tp->reordering != val)
tp->reordering = val;
- }
crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
rcu_read_unlock();
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4bb86580decd..e36eff0403f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -475,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->packets_out = 0;
newtp->retrans_out = 0;
newtp->sacked_out = 0;
- newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
newtp->tlp_high_seq = 0;
newtp->lsndtime = tcp_jiffies32;
@@ -509,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
keepalive_time_when(newtp));
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
- if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
- if (sock_net(sk)->ipv4.sysctl_tcp_fack)
- tcp_enable_fack(newtp);
- }
+ newtp->rx_opt.sack_ok = ireq->sack_ok;
newtp->window_clamp = req->rsk_window_clamp;
newtp->rcv_ssthresh = req->rsk_rcv_wnd;
newtp->rcv_wnd = req->rsk_rcv_wnd;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 11f69bbf9307..b6a2aa1dcf56 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
* is freed by GSO engine
*/
if (copy_destructor) {
+ int delta;
+
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
sum_truesize += skb->truesize;
- refcount_add(sum_truesize - gso_skb->truesize,
- &skb->sk->sk_wmem_alloc);
+ delta = sum_truesize - gso_skb->truesize;
+ /* In some pathological cases, delta can be negative.
+ * We need to either use refcount_add() or refcount_sub_and_test()
+ */
+ if (likely(delta >= 0))
+ refcount_add(delta, &skb->sk->sk_wmem_alloc);
+ else
+ WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
}
delta = htonl(oldlen + (skb_tail_pointer(skb) -
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a9d917e4dad5..0256f7a41041 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -220,7 +220,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
(*rcv_wscale) = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@ -1218,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
}
}
-/* When a modification to fackets out becomes necessary, we need to check
- * skb is counted to fackets_out or not.
- */
-static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
- int decr)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tp->sacked_out || tcp_is_reno(tp))
- return;
-
- if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
- tp->fackets_out -= decr;
-}
-
/* Pcount in the middle of the write queue got changed, we need to do various
* tweaks to fix counters
*/
@@ -1253,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
if (tcp_is_reno(tp) && decr > 0)
tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
- tcp_adjust_fackets_out(sk, skb, decr);
-
if (tp->lost_skb_hint &&
before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
+ (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
tp->lost_cnt_hint -= decr;
tcp_verify_left_out(tp);
@@ -2961,9 +2944,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
* retransmitted data is acknowledged. It tries to continue
* resending the rest of the retransmit queue, until either
* we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
*/
void tcp_xmit_retransmit_queue(struct sock *sk)
{
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 66d8c3d912fd..a6dffd65eb9d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1267,7 +1267,9 @@ out:
in6_ifa_put(ifp);
}
-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
+ struct inet6_ifaddr *ift,
+ bool block)
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
@@ -1371,7 +1373,7 @@ retry:
ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft, true, NULL);
+ tmp_valid_lft, tmp_prefered_lft, block, NULL);
if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -1956,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (ifpub) {
in6_ifa_hold(ifpub);
spin_unlock_bh(&ifp->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
} else {
spin_unlock_bh(&ifp->lock);
@@ -2456,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
* no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
- ipv6_create_tempaddr(ifp, NULL);
+ ipv6_create_tempaddr(ifp, NULL, false);
} else {
read_unlock_bh(&idev->lock);
}
@@ -4351,7 +4353,7 @@ restart:
spin_lock(&ifpub->lock);
ifpub->regen_count = 0;
spin_unlock(&ifpub->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
goto restart;
@@ -5057,6 +5059,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
+ array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
}
static inline size_t inet6_ifla6_size(void)
@@ -5984,6 +5987,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
}
static int minus_one = -1;
+static const int zero = 0;
static const int one = 1;
static const int two_five_five = 255;
@@ -6355,6 +6359,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = addrconf_sysctl_disable_policy,
},
{
+ .procname = "ndisc_tclass",
+ .data = &ipv6_devconf.ndisc_tclass,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&zero,
+ .extra2 = (void *)&two_five_five,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index e0170f62bc39..3c7a11b62334 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -55,17 +55,6 @@ struct ila_identifier {
};
};
-enum {
- ILA_ATYPE_IID = 0,
- ILA_ATYPE_LUID,
- ILA_ATYPE_VIRT_V4,
- ILA_ATYPE_VIRT_UNI_V6,
- ILA_ATYPE_VIRT_MULTI_V6,
- ILA_ATYPE_RSVD_1,
- ILA_ATYPE_RSVD_2,
- ILA_ATYPE_RSVD_3,
-};
-
#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
struct ila_addr {
@@ -93,6 +82,7 @@ struct ila_params {
struct ila_locator locator_match;
__wsum csum_diff;
u8 csum_mode;
+ u8 ident_type;
};
static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index aba0998ddbfb..8c88ecf29b93 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -13,30 +13,37 @@
#include <uapi/linux/ila.h>
#include "ila.h"
-static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+void ila_init_saved_csum(struct ila_params *p)
{
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+ if (!p->locator_match.v64)
+ return;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
+}
+
+static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p)
+{
if (p->locator_match.v64)
return p->csum_diff;
else
- return compute_csum_diff8((__be32 *)&iaddr->loc,
- (__be32 *)&p->locator);
+ return compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)&iaddr->loc);
}
-static void ila_csum_do_neutral(struct ila_addr *iaddr,
- struct ila_params *p)
+static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p);
+}
+
+static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr,
+ struct ila_params *p)
{
__sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff, fval;
- /* Check if checksum adjust value has been cached */
- if (p->locator_match.v64) {
- diff = p->csum_diff;
- } else {
- diff = compute_csum_diff8((__be32 *)&p->locator,
- (__be32 *)iaddr);
- }
+ diff = get_csum_diff_iaddr(iaddr, p);
fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
@@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
iaddr->ident.csum_neutral ^= 1;
}
-static void ila_csum_adjust_transport(struct sk_buff *skb,
+static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr,
struct ila_params *p)
{
+ __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff;
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ diff = get_csum_diff_iaddr(iaddr, p);
+
+ *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+}
+
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+ struct ila_params *p)
+{
size_t nhoff = sizeof(struct ipv6hdr);
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __wsum diff;
switch (ip6h->nexthdr) {
case NEXTHDR_TCP:
@@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
}
break;
}
-
- /* Now change destination address */
- iaddr->loc = p->locator;
}
void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
- bool set_csum_neutral)
+ bool sir2ila)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
- /* First deal with the transport checksum */
- if (ila_csum_neutral_set(iaddr->ident)) {
- /* C-bit is set in the locator indicating that this
- * is a locator being translated to a SIR address.
- * Perform (receiver) checksum-neutral translation.
- */
- if (!set_csum_neutral)
- ila_csum_do_neutral(iaddr, p);
- } else {
- switch (p->csum_mode) {
- case ILA_CSUM_ADJUST_TRANSPORT:
- ila_csum_adjust_transport(skb, p);
- break;
- case ILA_CSUM_NEUTRAL_MAP:
- ila_csum_do_neutral(iaddr, p);
- break;
- case ILA_CSUM_NO_ACTION:
+ switch (p->csum_mode) {
+ case ILA_CSUM_ADJUST_TRANSPORT:
+ ila_csum_adjust_transport(skb, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP:
+ if (sir2ila) {
+ if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) {
+ /* Checksum flag should never be
+ * set in a formatted SIR address.
+ */
+ break;
+ }
+ } else if (!ila_csum_neutral_set(iaddr->ident)) {
+ /* ILA to SIR translation and C-bit isn't
+ * set so we're good.
+ */
break;
}
+ ila_csum_do_neutral_fmt(iaddr, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP_AUTO:
+ ila_csum_do_neutral_nofmt(iaddr, p);
+ break;
+ case ILA_CSUM_NO_ACTION:
+ break;
}
/* Now change destination address */
iaddr->loc = p->locator;
}
-void ila_init_saved_csum(struct ila_params *p)
-{
- if (!p->locator_match.v64)
- return;
-
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator,
- (__be32 *)&p->locator_match);
-}
-
static int __init ila_init(void)
{
int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 696281b4bca2..3d56a2fb6f86 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -20,6 +20,7 @@ struct ila_lwt {
struct ila_params p;
struct dst_cache dst_cache;
u32 connected : 1;
+ u32 lwt_output : 1;
};
static inline struct ila_lwt *ila_lwt_lwtunnel(
@@ -45,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
- true);
+ if (ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(orig_dst->lwtstate),
+ true);
if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
/* Already have a next hop address in route, no need for
@@ -98,11 +101,15 @@ drop:
static int ila_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate);
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
+ if (!ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(dst->lwtstate),
+ false);
return dst->lwtstate->orig_input(skb);
@@ -114,6 +121,8 @@ drop:
static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
+ [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, },
};
static int ila_build_state(struct nlattr *nla,
@@ -127,33 +136,84 @@ static int ila_build_state(struct nlattr *nla,
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
struct ila_addr *iaddr;
+ u8 ident_type = ILA_ATYPE_USE_FORMAT;
+ u8 hook_type = ILA_HOOK_ROUTE_OUTPUT;
+ u8 csum_mode = ILA_CSUM_NO_ACTION;
+ bool lwt_output = true;
+ u8 eff_ident_type;
int ret;
if (family != AF_INET6)
return -EINVAL;
- if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
- /* Need to have full locator and at least type field
- * included in destination
- */
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[ILA_ATTR_LOCATOR])
return -EINVAL;
- }
iaddr = (struct ila_addr *)&cfg6->fc_dst;
- if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
- /* Don't allow translation for a non-ILA address or checksum
- * neutral flag to be set.
+ if (tb[ILA_ATTR_IDENT_TYPE])
+ ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]);
+
+ if (ident_type == ILA_ATYPE_USE_FORMAT) {
+ /* Infer identifier type from type field in formatted
+ * identifier.
*/
+
+ if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
+ /* Need to have full locator and at least type field
+ * included in destination
+ */
+ return -EINVAL;
+ }
+
+ eff_ident_type = iaddr->ident.type;
+ } else {
+ eff_ident_type = ident_type;
+ }
+
+ switch (eff_ident_type) {
+ case ILA_ATYPE_IID:
+ /* Don't allow ILA for IID type */
+ return -EINVAL;
+ case ILA_ATYPE_LUID:
+ break;
+ case ILA_ATYPE_VIRT_V4:
+ case ILA_ATYPE_VIRT_UNI_V6:
+ case ILA_ATYPE_VIRT_MULTI_V6:
+ case ILA_ATYPE_NONLOCAL_ADDR:
+ /* These ILA formats are not supported yet. */
+ default:
return -EINVAL;
}
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
- if (ret < 0)
- return ret;
+ if (tb[ILA_ATTR_HOOK_TYPE])
+ hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]);
+
+ switch (hook_type) {
+ case ILA_HOOK_ROUTE_OUTPUT:
+ lwt_output = true;
+ break;
+ case ILA_HOOK_ROUTE_INPUT:
+ lwt_output = false;
+ break;
+ default:
+ return -EINVAL;
+ }
- if (!tb[ILA_ATTR_LOCATOR])
+ if (tb[ILA_ATTR_CSUM_MODE])
+ csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+ if (csum_mode == ILA_CSUM_NEUTRAL_MAP &&
+ ila_csum_neutral_set(iaddr->ident)) {
+ /* Don't allow translation if checksum neutral bit is
+ * configured and it's set in the SIR address.
+ */
return -EINVAL;
+ }
newts = lwtunnel_state_alloc(sizeof(*ilwt));
if (!newts)
@@ -166,19 +226,18 @@ static int ila_build_state(struct nlattr *nla,
return ret;
}
+ ilwt->lwt_output = !!lwt_output;
+
p = ila_params_lwtunnel(newts);
+ p->csum_mode = csum_mode;
+ p->ident_type = ident_type;
p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
/* Precompute checksum difference for translation since we
* know both the old locator and the new one.
*/
p->locator_match = iaddr->loc;
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match, (__be32 *)&p->locator);
-
- if (tb[ILA_ATTR_CSUM_MODE])
- p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
ila_init_saved_csum(p);
@@ -203,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
struct ila_params *p = ila_params_lwtunnel(lwtstate);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate);
if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
ILA_ATTR_PAD))
goto nla_put_failure;
+
if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
goto nla_put_failure;
+ if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE,
+ ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT :
+ ILA_HOOK_ROUTE_INPUT))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -220,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
{
return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */
0;
}
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 3123b9de91b5..6eb5e68f112a 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -121,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
@@ -138,6 +139,14 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[ILA_ATTR_CSUM_MODE])
xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
+ else
+ xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+
+ if (info->attrs[ILA_ATTR_IDENT_TYPE])
+ xp->ip.ident_type = nla_get_u8(
+ info->attrs[ILA_ATTR_IDENT_TYPE]);
+ else
+ xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -198,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
static unsigned int
ila_nf_input(void *priv,
@@ -396,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
(__force u64)ila->xp.ip.locator_match.v64,
ILA_ATTR_PAD) ||
nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
+ nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) ||
+ nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type))
return -1;
return 0;
@@ -607,7 +617,7 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -617,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
/* Assumes skb contains a valid IPv6 header that is pulled */
- if (!ila_addr_is_ila(iaddr)) {
- /* Type indicates this is not an ILA address */
- return 0;
- }
+ /* No check here that ILA type in the mapping matches what is in the
+ * address. We assume that whatever sender gaves us can be translated.
+ * The checksum mode however is relevant.
+ */
rcu_read_lock();
ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
+ ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila);
rcu_read_unlock();
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 59fad81e5f7a..9c24b85949c1 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1617,6 +1617,10 @@ int ip6mr_sk_done(struct sock *sk)
struct net *net = sock_net(sk);
struct mr6_table *mrt;
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return err;
+
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
if (sk == mrt->mroute6_sk) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f9c3ffe04382..b3cea200c85e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -427,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb,
int hop_limit, int len)
{
struct ipv6hdr *hdr;
+ struct inet6_dev *idev;
+ unsigned tclass;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
+ tclass = idev ? idev->cnf.ndisc_tclass : 0;
+ rcu_read_unlock();
skb_push(skb, sizeof(*hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, 0, 0);
+ ip6_flow_hdr(hdr, tclass, 0);
hdr->payload_len = htons(len);
hdr->nexthdr = IPPROTO_ICMPV6;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 01bd3ee5ebc6..f06e25065a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -800,6 +800,25 @@ get_counters(const struct xt_table_info *t,
}
}
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ip6t_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ const struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i;
+ }
+ cond_resched();
+ }
+}
+
static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
@@ -1090,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index fe01dc953c56..3b80a38f62b8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -339,7 +339,7 @@ static void ipv6_hooks_unregister(struct net *net)
mutex_unlock(&register_ipv6_hooks);
}
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.l3proto = PF_INET6,
.pkt_to_tuple = ipv6_pkt_to_tuple,
.invert_tuple = ipv6_invert_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a9e1fd1a8536..3ac0d826afc4 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -94,7 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeout)
{
/* Do not immediately delete the connection after the first
@@ -176,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
static int
icmpv6_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
@@ -187,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
- if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmpv6: short packet ");
+ icmpv6_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
- if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmpv6: ICMPv6 checksum failed ");
+ icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
return -NF_ACCEPT;
}
@@ -258,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-static int icmpv6_nlattr_tuple_size(void)
+static unsigned int icmpv6_nlattr_tuple_size(void)
{
- return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index a338bbc33cf3..4a7e5ffa5108 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -31,37 +31,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
return id;
}
-/* This function exists only for tap drivers that must support broken
- * clients requesting UFO without specifying an IPv6 fragment ID.
- *
- * This is similar to ipv6_select_ident() but we use an independent hash
- * seed to limit information leakage.
- *
- * The network header must be set before calling this.
- */
-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
-{
- static u32 ip6_proxy_idents_hashrnd __read_mostly;
- struct in6_addr buf[2];
- struct in6_addr *addrs;
- u32 id;
-
- addrs = skb_header_pointer(skb,
- skb_network_offset(skb) +
- offsetof(struct ipv6hdr, saddr),
- sizeof(buf), buf);
- if (!addrs)
- return;
-
- net_get_random_once(&ip6_proxy_idents_hashrnd,
- sizeof(ip6_proxy_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
- &addrs[1], &addrs[0]);
- skb_shinfo(skb)->ip6_frag_id = htonl(id);
-}
-EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
-
__be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0e2529958b52..6bb98c93edfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1940,8 +1940,8 @@ struct proto tcpv6_prot = {
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
- .sysctl_wmem = sysctl_tcp_wmem,
- .sysctl_rmem = sysctl_tcp_rmem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 7c8d1eb757a5..350fcd39ebd8 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1246,8 +1246,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
list_del_rcu(&tunnel->list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- l2tp_tunnel_closeall(tunnel);
-
tunnel->sock = NULL;
l2tp_tunnel_dec_refcount(tunnel);
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 3e2dec1fb0f5..5c366ecfa1cb 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -41,7 +41,6 @@
/* via netdev_priv() */
struct l2tp_eth {
- struct sock *tunnel_sock;
struct l2tp_session *session;
atomic_long_t tx_bytes;
atomic_long_t tx_packets;
@@ -313,7 +312,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
priv = netdev_priv(dev);
priv->session = session;
- priv->tunnel_sock = tunnel->sock;
session->recv_skb = l2tp_eth_dev_recv;
session->session_close = l2tp_eth_delete;
#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 6dbe450400a2..ff61124fdf59 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct iphdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -178,24 +179,17 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
-
- read_lock_bh(&l2tp_ip_lock);
- sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
- inet_iif(skb), tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip_lock);
- goto discard;
- }
+ iph = (struct iphdr *)skb_network_header(skb);
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip_lock);
+ sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
+ tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip_lock);
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 59ebb6e4f735..192344688c06 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct ipv6hdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -192,24 +193,17 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct ipv6hdr *iph = ipv6_hdr(skb);
-
- read_lock_bh(&l2tp_ip6_lock);
- sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
- inet6_iif(skb), tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip6_lock);
- goto discard;
- }
+ iph = ipv6_hdr(skb);
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip6_lock);
+ sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+ inet6_iif(skb), tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip6_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip6_lock);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5f5c78b632d0..b412fc3351dc 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -127,8 +127,6 @@ struct pppol2tp_session {
* PPPoX socket */
struct sock *__sk; /* Copy of .sk, for cleanup */
struct rcu_head rcu; /* For asynchronous release */
- struct sock *tunnel_sock; /* Pointer to the tunnel UDP
- * socket */
int flags; /* accessed by PPPIOCGFLAGS.
* Unused. */
};
@@ -295,7 +293,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
int error;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int uhlen;
error = -ENOTCONN;
@@ -308,10 +305,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
if (session == NULL)
goto error;
- ps = l2tp_session_priv(session);
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto error_put_sess;
+ tunnel = session->tunnel;
uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
@@ -322,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
0, GFP_KERNEL);
if (!skb)
- goto error_put_sess_tun;
+ goto error_put_sess;
/* Reserve space for headers. */
skb_reserve(skb, NET_SKB_PAD);
@@ -340,20 +334,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
if (error < 0) {
kfree_skb(skb);
- goto error_put_sess_tun;
+ goto error_put_sess;
}
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
local_bh_enable();
- sock_put(ps->tunnel_sock);
sock_put(sk);
return total_len;
-error_put_sess_tun:
- sock_put(ps->tunnel_sock);
error_put_sess:
sock_put(sk);
error:
@@ -377,10 +368,8 @@ error:
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
struct sock *sk = (struct sock *) chan->private;
- struct sock *sk_tun;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int uhlen, headroom;
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -391,13 +380,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
if (session == NULL)
goto abort;
- ps = l2tp_session_priv(session);
- sk_tun = ps->tunnel_sock;
- if (sk_tun == NULL)
- goto abort_put_sess;
- tunnel = l2tp_sock_to_tunnel(sk_tun);
- if (tunnel == NULL)
- goto abort_put_sess;
+ tunnel = session->tunnel;
uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
headroom = NET_SKB_PAD +
@@ -406,7 +389,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
session->hdr_len + /* L2TP header */
2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
if (skb_cow_head(skb, headroom))
- goto abort_put_sess_tun;
+ goto abort_put_sess;
/* Setup PPP header */
__skb_push(skb, 2);
@@ -417,12 +400,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
l2tp_xmit_skb(session, skb, session->hdr_len);
local_bh_enable();
- sock_put(sk_tun);
sock_put(sk);
+
return 1;
-abort_put_sess_tun:
- sock_put(sk_tun);
abort_put_sess:
sock_put(sk);
abort:
@@ -609,7 +590,6 @@ static void pppol2tp_session_init(struct l2tp_session *session)
ps = l2tp_session_priv(session);
mutex_init(&ps->sk_lock);
- ps->tunnel_sock = session->tunnel->sock;
ps->owner = current->pid;
/* If PMTU discovery was enabled, use the MTU that was discovered */
@@ -760,13 +740,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
error = -EEXIST;
goto end;
}
-
- /* consistency checks */
- if (ps->tunnel_sock != tunnel->sock) {
- mutex_unlock(&ps->sk_lock);
- error = -EEXIST;
- goto end;
- }
} else {
/* Default MTU must allow space for UDP/L2TP/PPP headers */
cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
@@ -919,9 +892,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
goto end;
pls = l2tp_session_priv(session);
- tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
+ tunnel = session->tunnel;
inet = inet_sk(tunnel->sock);
if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -1001,8 +972,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
*usockaddr_len = len;
error = 0;
- sock_put(pls->tunnel_sock);
-end_put_sess:
sock_put(sk);
end:
return error;
@@ -1241,7 +1210,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
struct sock *sk = sock->sk;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int err;
if (!sk)
@@ -1265,16 +1233,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
/* Special case: if session's session_id is zero, treat ioctl as a
* tunnel ioctl
*/
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
- sock_put(ps->tunnel_sock);
goto end_put_sess;
}
@@ -1400,7 +1362,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int val;
int err;
@@ -1425,20 +1386,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
/* Special case: if session_id == 0x0000, treat as operation on tunnel
*/
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
- sock_put(ps->tunnel_sock);
- } else
+ } else {
err = pppol2tp_session_setsockopt(sk, session, optname, val);
+ }
-end_put_sess:
sock_put(sk);
end:
return err;
@@ -1526,7 +1481,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
struct l2tp_tunnel *tunnel;
int val, len;
int err;
- struct pppol2tp_session *ps;
if (level != SOL_PPPOL2TP)
return -EINVAL;
@@ -1550,16 +1504,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
goto end;
/* Special case: if session_id == 0x0000, treat as operation on tunnel */
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
- sock_put(ps->tunnel_sock);
if (err)
goto end_put_sess;
} else {
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index f135938bf781..67e708e98ccf 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
ncm->data[2] = data;
ncm->data[4] = ntohl(lsc->oem_status);
+ netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
+ nc->id, data & 0x1 ? "up" : "down");
+
chained = !list_empty(&nc->link);
state = nc->state;
spin_unlock_irqrestore(&nc->lock, flags);
@@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
ncm = &nc->modes[NCSI_MODE_LINK];
hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
ncm->data[3] = ntohl(hncdsc->status);
+ netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
+ nc->id, ncm->data[3] & 0x3 ? "up" : "down");
if (!list_empty(&nc->link) ||
nc->state != NCSI_CHANNEL_ACTIVE) {
spin_unlock_irqrestore(&nc->lock, flags);
@@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
}
ret = ncsi_validate_aen_pkt(h, nah->payload);
- if (ret)
+ if (ret) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: 'bad' packet ignored for AEN type 0x%x\n",
+ h->type);
goto out;
+ }
ret = nah->handler(ndp, h);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Handler for AEN type 0x%x returned %d\n",
+ h->type, ret);
out:
consume_skb(skb);
return ret;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 47baf914eec2..a2b904a718c6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -229,6 +229,8 @@ static void ncsi_channel_monitor(unsigned long data)
case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
break;
default:
+ netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
+ nc->id);
if (!(ndp->flags & NCSI_DEV_HWA)) {
ncsi_report_link(ndp, true);
ndp->flags |= NCSI_DEV_RESHUFFLE;
@@ -682,7 +684,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
if (!data) {
netdev_err(ndp->ndev.dev,
- "ncsi: failed to retrieve filter %d\n", index);
+ "NCSI: failed to retrieve filter %d\n", index);
/* Set the VLAN id to 0 - this will still disable the entry in
* the filter table, but we won't know what it was.
*/
@@ -692,7 +694,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
}
netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "ncsi: removed vlan tag %u at index %d\n",
+ "NCSI: removed vlan tag %u at index %d\n",
vid, index + 1);
ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
@@ -718,7 +720,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
if (index < 0) {
/* New tag to add */
netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "ncsi: new vlan id to set: %u\n",
+ "NCSI: new vlan id to set: %u\n",
vlan->vid);
break;
}
@@ -745,7 +747,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
}
netdev_printk(KERN_DEBUG, ndp->ndev.dev,
- "ncsi: set vid %u in packet, index %u\n",
+ "NCSI: set vid %u in packet, index %u\n",
vlan->vid, index + 1);
nca->type = NCSI_PKT_CMD_SVF;
nca->words[1] = vlan->vid;
@@ -784,8 +786,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD_SP\n");
goto error;
+ }
nd->state = ncsi_dev_state_config_cis;
break;
@@ -797,8 +802,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
nca.channel = nc->id;
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD_CIS\n");
goto error;
+ }
nd->state = ncsi_dev_state_config_clear_vids;
break;
@@ -895,10 +903,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
}
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD %x\n",
+ nca.type);
goto error;
+ }
break;
case ncsi_dev_state_config_done:
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: channel %u config done\n", nc->id);
spin_lock_irqsave(&nc->lock, flags);
if (nc->reconfigure_needed) {
/* This channel's configuration has been updated
@@ -925,6 +939,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
} else {
hot_nc = NULL;
nc->state = NCSI_CHANNEL_INACTIVE;
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: channel %u link down after config\n",
+ nc->id);
}
spin_unlock_irqrestore(&nc->lock, flags);
@@ -937,8 +954,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
ncsi_process_next_channel(ndp);
break;
default:
- netdev_warn(dev, "Wrong NCSI state 0x%x in config\n",
- nd->state);
+ netdev_alert(dev, "Wrong NCSI state 0x%x in config\n",
+ nd->state);
}
return;
@@ -990,10 +1007,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
}
if (!found) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: No channel found with link\n");
ncsi_report_link(ndp, true);
return -ENODEV;
}
+ ncm = &found->modes[NCSI_MODE_LINK];
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: Channel %u added to queue (link %s)\n",
+ found->id, ncm->data[2] & 0x1 ? "up" : "down");
+
out:
spin_lock_irqsave(&ndp->lock, flags);
list_add_tail_rcu(&found->link, &ndp->channel_queue);
@@ -1055,6 +1079,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
/* We can have no channels in extremely case */
if (list_empty(&ndp->channel_queue)) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: No available channels for HWA\n");
ncsi_report_link(ndp, false);
return -ENOENT;
}
@@ -1223,6 +1249,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
return;
error:
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during probe\n",
+ nca.type);
ncsi_report_link(ndp, true);
}
@@ -1276,10 +1305,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
switch (old_state) {
case NCSI_CHANNEL_INACTIVE:
ndp->ndev.state = ncsi_dev_state_config;
+ netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n",
+ nc->id);
ncsi_configure_channel(ndp);
break;
case NCSI_CHANNEL_ACTIVE:
ndp->ndev.state = ncsi_dev_state_suspend;
+ netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n",
+ nc->id);
ncsi_suspend_channel(ndp);
break;
default:
@@ -1299,6 +1332,8 @@ out:
return ncsi_choose_active_channel(ndp);
}
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: No more channels to process\n");
ncsi_report_link(ndp, false);
return -ENODEV;
}
@@ -1390,7 +1425,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
ncsi_dev_state_config ||
!list_empty(&nc->link)) {
netdev_printk(KERN_DEBUG, nd->dev,
- "ncsi: channel %p marked dirty\n",
+ "NCSI: channel %p marked dirty\n",
nc);
nc->reconfigure_needed = true;
}
@@ -1410,7 +1445,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
spin_unlock_irqrestore(&ndp->lock, flags);
netdev_printk(KERN_DEBUG, nd->dev,
- "ncsi: kicked channel %p\n", nc);
+ "NCSI: kicked channel %p\n", nc);
n++;
}
}
@@ -1431,7 +1466,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
nd = ncsi_find_dev(dev);
if (!nd) {
- netdev_warn(dev, "ncsi: No net_device?\n");
+ netdev_warn(dev, "NCSI: No net_device?\n");
return 0;
}
@@ -1442,7 +1477,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
n_vids++;
if (vlan->vid == vid) {
netdev_printk(KERN_DEBUG, dev,
- "vid %u already registered\n", vid);
+ "NCSI: vid %u already registered\n", vid);
return 0;
}
}
@@ -1461,7 +1496,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
vlan->vid = vid;
list_add_rcu(&vlan->list, &ndp->vlan_vids);
- netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid);
+ netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid);
found = ncsi_kick_channels(ndp) != 0;
@@ -1481,7 +1516,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
nd = ncsi_find_dev(dev);
if (!nd) {
- netdev_warn(dev, "ncsi: no net_device?\n");
+ netdev_warn(dev, "NCSI: no net_device?\n");
return 0;
}
@@ -1491,14 +1526,14 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
if (vlan->vid == vid) {
netdev_printk(KERN_DEBUG, dev,
- "vid %u found, removing\n", vid);
+ "NCSI: vid %u found, removing\n", vid);
list_del_rcu(&vlan->list);
found = true;
kfree(vlan);
}
if (!found) {
- netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid);
+ netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid);
return -EINVAL;
}
@@ -1581,10 +1616,12 @@ int ncsi_start_dev(struct ncsi_dev *nd)
return 0;
}
- if (ndp->flags & NCSI_DEV_HWA)
+ if (ndp->flags & NCSI_DEV_HWA) {
+ netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
ret = ncsi_enable_hwa(ndp);
- else
+ } else {
ret = ncsi_choose_active_channel(ndp);
+ }
return ret;
}
@@ -1615,6 +1652,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
}
}
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n");
ncsi_report_link(ndp, true);
}
EXPORT_SYMBOL_GPL(ncsi_stop_dev);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 927dad4759d1..efd933ff5570 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_ENABLE];
if (ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_ENABLE];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 0;
return 0;
@@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
if (ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
/* Check if the AEN has been enabled */
ncm = &nc->modes[NCSI_MODE_AEN];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to AEN configuration */
cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
@@ -382,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
/* Check if VLAN mode has been enabled */
ncm = &nc->modes[NCSI_MODE_VLAN];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to VLAN mode */
cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
@@ -409,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
/* Check if VLAN mode has been enabled */
ncm = &nc->modes[NCSI_MODE_VLAN];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to VLAN mode */
ncm->enable = 0;
@@ -455,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
bitmap = &ncf->bitmap;
if (cmd->at_e & 0x1) {
- if (test_and_set_bit(cmd->index, bitmap))
- return -EBUSY;
+ set_bit(cmd->index, bitmap);
memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
} else {
- if (!test_and_clear_bit(cmd->index, bitmap))
- return -EBUSY;
-
+ clear_bit(cmd->index, bitmap);
memset(ncf->data + 6 * cmd->index, 0, 6);
}
@@ -485,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
/* Check if broadcast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_BC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to broadcast filter mode */
cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
@@ -511,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
/* Check if broadcast filter isn't enabled */
ncm = &nc->modes[NCSI_MODE_BC];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to broadcast filter mode */
ncm->enable = 0;
@@ -538,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
/* Check if multicast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_MC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to multicast filter mode */
cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
@@ -564,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
/* Check if multicast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_MC];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to multicast filter mode */
ncm->enable = 0;
@@ -591,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
/* Check if flow control has been enabled */
ncm = &nc->modes[NCSI_MODE_FC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to flow control mode */
cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
@@ -1032,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
if (payload < 0)
payload = ntohs(hdr->length);
ret = ncsi_validate_rsp_pkt(nr, payload);
- if (ret)
+ if (ret) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: 'bad' packet ignored for type 0x%x\n",
+ hdr->type);
goto out;
+ }
/* Process the packet */
ret = nrh->handler(nr);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Handler for packet type 0x%x returned %d\n",
+ hdr->type, ret);
out:
ncsi_free_request(nr);
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index a2f19b9906e9..0f164e986bf1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
if (unlikely(tb[IPSET_ATTR_CIDR])) {
- u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
if (cidr != HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index c9b4e05ad940..e864681b8dc5 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -454,7 +454,6 @@ static size_t
list_set_memsize(const struct list_set *map, size_t dsize)
{
struct set_elem *e;
- size_t memsize;
u32 n = 0;
rcu_read_lock();
@@ -462,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize)
n++;
rcu_read_unlock();
- memsize = sizeof(*map) + n * dsize;
-
- return memsize;
+ return (sizeof(*map) + n * dsize);
}
static int
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 1c8a42c1056c..d5be9c25fad6 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -3,6 +3,141 @@
/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
+#ifdef E
+#undef E
+#endif
+
+#define PREFIXES_MAP \
+ E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+
#define E(a, b, c, d) \
{.ip6 = { \
htonl(a), htonl(b), \
@@ -13,135 +148,7 @@
* just use prefixlen_netmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_netmask_map[] = {
- E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
- E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+ PREFIXES_MAP
};
EXPORT_SYMBOL_GPL(ip_set_netmask_map);
@@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map);
* just use prefixlen_hostmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_hostmask_map[] = {
- E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
- E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+ PREFIXES_MAP
};
EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3d2ac71a83ec..f73561ca982d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
ret = 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 4f940d7eb2f7..fac8c802b4ea 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
unsigned int hash;
if (svc->flags & IP_VS_SVC_F_HASHED) {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
@@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
{
if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
- pr_err("%s(): request for unhash flagged, called from %pF\n",
+ pr_err("%s(): request for unhash flagged, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
@@ -2034,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
" -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
} else {
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
+ if (svc->ipvs != ipvs)
+ return 0;
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 01130392b7c0..5749fcaa2770 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1083,7 +1083,7 @@ static void gc_worker(struct work_struct *work)
next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
gc_work->early_drop = false;
- queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
+ queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
@@ -1419,7 +1419,7 @@ repeat:
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1563,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
-int nf_ct_port_nlattr_tuple_size(void)
+unsigned int nf_ct_port_nlattr_tuple_size(void)
{
- return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
#endif
@@ -2084,7 +2089,7 @@ int nf_conntrack_init_start(void)
goto err_proto;
conntrack_gc_work_init(&conntrack_gc_work);
- queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
+ queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
return 0;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 89b2e46925c4..cf1bf2605c10 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -91,41 +91,41 @@ typedef struct field_t {
} field_t;
/* Bit Stream */
-typedef struct {
+struct bitstr {
unsigned char *buf;
unsigned char *beg;
unsigned char *end;
unsigned char *cur;
unsigned int bit;
-} bitstr_t;
+};
/* Tool Functions */
#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
-static unsigned int get_len(bitstr_t *bs);
-static unsigned int get_bit(bitstr_t *bs);
-static unsigned int get_bits(bitstr_t *bs, unsigned int b);
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b);
-static unsigned int get_uint(bitstr_t *bs, int b);
+static unsigned int get_len(struct bitstr *bs);
+static unsigned int get_bit(struct bitstr *bs);
+static unsigned int get_bits(struct bitstr *bs, unsigned int b);
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b);
+static unsigned int get_uint(struct bitstr *bs, int b);
/* Decoder Functions */
-static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level);
/* Decoder Functions Vector */
-typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int);
+typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int);
static const decoder_t Decoders[] = {
decode_nul,
decode_bool,
@@ -150,7 +150,7 @@ static const decoder_t Decoders[] = {
* Functions
****************************************************************************/
/* Assume bs is aligned && v < 16384 */
-static unsigned int get_len(bitstr_t *bs)
+static unsigned int get_len(struct bitstr *bs)
{
unsigned int v;
@@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs)
}
/****************************************************************************/
-static unsigned int get_bit(bitstr_t *bs)
+static unsigned int get_bit(struct bitstr *bs)
{
unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
@@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs)
/****************************************************************************/
/* Assume b <= 8 */
-static unsigned int get_bits(bitstr_t *bs, unsigned int b)
+static unsigned int get_bits(struct bitstr *bs, unsigned int b)
{
unsigned int v, l;
@@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b)
/****************************************************************************/
/* Assume b <= 32 */
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
{
unsigned int v, l, shift, bytes;
@@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
/****************************************************************************
* Assume bs is aligned and sizeof(unsigned int) == 4
****************************************************************************/
-static unsigned int get_uint(bitstr_t *bs, int b)
+static unsigned int get_uint(struct bitstr *bs, int b)
{
unsigned int v = 0;
@@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b)
}
/****************************************************************************/
-static int decode_nul(bitstr_t *bs, const struct field_t *f,
+static int decode_nul(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bool(bitstr_t *bs, const struct field_t *f,
+static int decode_bool(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_oid(bitstr_t *bs, const struct field_t *f,
+static int decode_oid(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
int len;
@@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_int(bitstr_t *bs, const struct field_t *f,
+static int decode_int(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_enum(bitstr_t *bs, const struct field_t *f,
+static int decode_enum(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_numstr(bitstr_t *bs, const struct field_t *f,
+static int decode_numstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_octstr(bitstr_t *bs, const struct field_t *f,
+static int decode_octstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_seq(bitstr_t *bs, const struct field_t *f,
+static int decode_seq(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
@@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_seqof(bitstr_t *bs, const struct field_t *f,
+static int decode_seqof(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int count, effective_count = 0, i, len = 0;
@@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
/****************************************************************************/
-static int decode_choice(bitstr_t *bs, const struct field_t *f,
+static int decode_choice(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int type, ext, len = 0;
@@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
0, _RasMessage
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = bs.beg = bs.cur = buf;
bs.end = buf + sz;
@@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
0, _H323_UserInformation
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = buf;
bs.beg = bs.cur = beg;
@@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
DECODE | EXT, 0, _MultimediaSystemControlMessage
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = bs.beg = bs.cur = buf;
bs.end = buf + sz;
@@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
if (sz < 1)
break;
len = *p++;
+ sz--;
if (sz < len)
break;
p += len;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index de4053d84364..6e0adfefb9ed 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -533,11 +533,11 @@ nla_put_failure:
return -1;
}
-static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
+static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
- size_t len;
+ size_t len, len4 = 0;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
len = l3proto->nla_size;
@@ -545,8 +545,12 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
len += l4proto->nla_size;
+ if (l4proto->nlattr_tuple_size) {
+ len4 = l4proto->nlattr_tuple_size();
+ len4 *= 3u; /* ORIG, REPLY, MASTER */
+ }
- return len;
+ return len + len4;
}
static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b3e489c859ec..c8e9c9503a08 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -27,6 +27,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_log.h>
static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
@@ -63,6 +64,52 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
*header = NULL;
*table = NULL;
}
+
+__printf(5, 6)
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+ struct net *net,
+ u16 pf, u8 protonum,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ if (net->ct.sysctl_log_invalid != protonum ||
+ net->ct.sysctl_log_invalid != IPPROTO_RAW)
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_proto_%d: %pV ", protonum, &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
+
+__printf(3, 4)
+void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
+ const struct nf_conn *ct,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ struct net *net;
+ va_list args;
+
+ net = nf_ct_net(ct);
+ if (likely(net->ct.sysctl_log_invalid == 0))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+ nf_ct_protonum(ct), "%pV", &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
#endif
const struct nf_conntrack_l4proto *
@@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
int ret;
@@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto)
return ret;
}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ if (nfproto == NFPROTO_INET) {
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ } else {
+ err = nf_ct_netns_do_get(net, nfproto);
+ if (err < 0)
+ goto err1;
+ }
+ return 0;
+
+err2:
+ nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+ return err;
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-void nf_ct_netns_put(struct net *net, u8 nfproto)
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
@@ -171,6 +242,15 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
nf_ct_l3proto_module_put(nfproto);
}
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+ if (nfproto == NFPROTO_INET) {
+ nf_ct_netns_do_put(net, NFPROTO_IPV4);
+ nf_ct_netns_do_put(net, NFPROTO_IPV6);
+ } else
+ nf_ct_netns_do_put(net, nfproto);
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
const struct nf_conntrack_l4proto *
@@ -351,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
l4proto->nla_size = 0;
if (l4proto->nlattr_size)
l4proto->nla_size += l4proto->nlattr_size();
- if (l4proto->nlattr_tuple_size)
- l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 0f5a4d79f6b8..2a446f4a554c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
default:
dn = dccp_pernet(net);
if (dn->dccp_loose == 0) {
- msg = "nf_ct_dccp: not picking up existing connection ";
+ msg = "not picking up existing connection ";
goto out_invalid;
}
case CT_DCCP_REQUEST:
break;
case CT_DCCP_INVALID:
- msg = "nf_ct_dccp: invalid state transition ";
+ msg = "invalid state transition ";
goto out_invalid;
}
@@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
- NULL, "%s", msg);
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
return false;
}
@@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net)
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
- struct net *net = nf_ct_net(ct);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
@@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_dccp: invalid packet ignored ");
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_dccp: invalid state transition ");
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
return -NF_ACCEPT;
}
@@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg);
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 9cd40700842e..1f86ddf6649a 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -60,7 +60,6 @@ static int generic_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeout)
{
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 09a90484c27d..a2503005d80b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -244,7 +244,6 @@ static int gre_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is a GRE connection.
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6303a88af12b..80faf04ddf15 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -306,7 +306,6 @@ static int sctp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
enum sctp_conntrack new_state, old_state;
@@ -522,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
}
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_SCTP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg);
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index cba1c6ffe51a..b12fc07111d0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -493,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
unsigned int index,
const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *tcph,
- u_int8_t pf)
+ const struct tcphdr *tcph)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = tcp_pernet(net);
@@ -702,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
tn->tcp_be_liberal)
res = true;
- if (!res && LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: %s ",
+ if (!res) {
+ nf_ct_l4proto_log_invalid(skb, ct,
+ "%s",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
before(sack, receiver->td_end + 1) ?
@@ -713,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
: "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (already ACKed data retransmitted)"
: "SEQ is over the upper bound (over the window of the receiver)");
+ }
}
pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
@@ -738,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
[TCPHDR_ACK|TCPHDR_URG] = 1,
};
+static void tcp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
+}
+
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
static int tcp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
@@ -753,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* Smaller that minimal TCP header? */
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: short packet ");
+ tcp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: truncated/malformed packet ");
+ tcp_error_log(skb, net, pf, "truncated packet");
return -NF_ACCEPT;
}
@@ -774,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: bad TCP checksum ");
+ tcp_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
/* Check TCP flags. */
tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid TCP flag combination ");
+ tcp_error_log(skb, net, pf, "invalid tcp flag combination");
return -NF_ACCEPT;
}
@@ -802,7 +800,6 @@ static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
@@ -939,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct,
IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid packet ignored in "
- "state %s ", tcp_conntrack_names[old_state]);
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
+ "state %s ", tcp_conntrack_names[old_state]);
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
/* Special case for SYN proxy: when the SYN to the server or
@@ -964,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid state ");
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -981,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Detected RFC5961 challenge ACK */
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: challenge-ACK ignored ");
+ nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
return NF_ACCEPT; /* Don't change state */
}
break;
@@ -993,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct,
&& before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL,
- NULL, "nf_ct_tcp: invalid RST ");
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
return -NF_ACCEPT;
}
if (index == TCP_RST_SET
@@ -1022,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct,
}
if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
- skb, dataoff, th, pf)) {
+ skb, dataoff, th)) {
spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
@@ -1288,9 +1277,14 @@ static int tcp_nlattr_size(void)
+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
}
-static int tcp_nlattr_tuple_size(void)
+static unsigned int tcp_nlattr_tuple_size(void)
{
- return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8af734cd1a94..3a5f727103af 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -73,7 +73,6 @@ static int udp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is some kind of UDP
@@ -99,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
}
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+static void udplite_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
+}
+
static int udplite_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
@@ -112,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (!hdr) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: short packet ");
+ udplite_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
@@ -122,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
if (cscov == 0) {
cscov = udplen;
} else if (cscov < sizeof(*hdr) || cscov > udplen) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: invalid checksum coverage ");
+ udplite_error_log(skb, net, pf, "invalid checksum coverage");
return -NF_ACCEPT;
}
/* UDPLITE mandates checksums */
if (!hdr->check) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: checksum missing ");
+ udplite_error_log(skb, net, pf, "checksum missing");
return -NF_ACCEPT;
}
@@ -140,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: bad UDPLite checksum ");
+ udplite_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
@@ -150,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
}
#endif
+static void udp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
+}
+
static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int dataoff,
u_int8_t pf,
@@ -162,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: short packet ");
+ udp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: truncated/malformed packet ");
+ udp_error_log(skb, net, pf, "truncated/malformed packet");
return -NF_ACCEPT;
}
@@ -186,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: bad UDP checksum ");
+ udp_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index af8345fc4fbd..6c38421e31f9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -542,17 +542,14 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (nf_nat_proto_remove(ct, data))
return 1;
- if ((ct->status & IPS_SRC_NAT_DONE) == 0)
- return 0;
-
- /* This netns is being destroyed, and conntrack has nat null binding.
+ /* This module is being removed and conntrack has nat null binding.
* Remove it from bysource hash, as the table will be freed soon.
*
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
- clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
- __nf_nat_cleanup_conntrack(ct);
+ if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
+ __nf_nat_cleanup_conntrack(ct);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 64e1ee091225..d8327b43e4dc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2549,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
- if (est.lookup == best.lookup) {
- if (!desc->size) {
- if (est.space < best.space)
- break;
- } else if (est.size < best.size) {
- break;
- }
- }
+ if (est.lookup == best.lookup &&
+ est.space < best.space)
+ break;
continue;
case NFT_SET_POL_MEMORY:
if (!desc->size) {
@@ -3593,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
return 0;
}
-static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
-{
- u8 genmask = nft_genmask_cur(net);
- const struct nft_set *set;
- struct nft_ctx ctx;
- int err;
-
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
- if (err < 0)
- return err;
-
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
- genmask);
- if (IS_ERR(set))
- return PTR_ERR(set);
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .dump = nf_tables_dump_set,
- .done = nf_tables_dump_set_done,
- };
- struct nft_set_dump_ctx *dump_ctx;
-
- dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
- if (!dump_ctx)
- return -ENOMEM;
-
- dump_ctx->set = set;
- dump_ctx->ctx = ctx;
-
- c.data = dump_ctx;
- return netlink_dump_start(nlsk, skb, nlh, &c);
- }
- return -EOPNOTSUPP;
-}
-
static int nf_tables_fill_setelem_info(struct sk_buff *skb,
const struct nft_ctx *ctx, u32 seq,
u32 portid, int event, u16 flags,
@@ -3677,6 +3633,135 @@ nla_put_failure:
return -1;
}
+static int nft_setelem_parse_flags(const struct nft_set *set,
+ const struct nlattr *attr, u32 *flags)
+{
+ if (attr == NULL)
+ return 0;
+
+ *flags = ntohl(nla_get_be32(attr));
+ if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_INTERVAL) &&
+ *flags & NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr)
+{
+ struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ const struct nft_set_ext *ext;
+ struct nft_data_desc desc;
+ struct nft_set_elem elem;
+ struct sk_buff *skb;
+ uint32_t flags = 0;
+ void *priv;
+ int err;
+
+ err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+ nft_set_elem_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!nla[NFTA_SET_ELEM_KEY])
+ return -EINVAL;
+
+ err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+ if (err < 0)
+ return err;
+
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
+ nla[NFTA_SET_ELEM_KEY]);
+ if (err < 0)
+ return err;
+
+ err = -EINVAL;
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+ return err;
+
+ priv = set->ops->get(ctx->net, set, &elem, flags);
+ if (IS_ERR(priv))
+ return PTR_ERR(priv);
+
+ elem.priv = priv;
+ ext = nft_set_elem_ext(set, &elem);
+
+ err = -ENOMEM;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err1;
+
+ err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
+ NFT_MSG_NEWSETELEM, 0, set, &elem);
+ if (err < 0)
+ goto err2;
+
+ err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT);
+ /* This avoids a loop in nfnetlink. */
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err2:
+ kfree_skb(skb);
+err1:
+ /* this avoids a loop in nfnetlink. */
+ return err == -EAGAIN ? -ENOBUFS : err;
+}
+
+static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_set *set;
+ struct nlattr *attr;
+ struct nft_ctx ctx;
+ int rem, err = 0;
+
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_set,
+ .done = nf_tables_dump_set_done,
+ };
+ struct nft_set_dump_ctx *dump_ctx;
+
+ dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+ if (!dump_ctx)
+ return -ENOMEM;
+
+ dump_ctx->set = set;
+ dump_ctx->ctx = ctx;
+
+ c.data = dump_ctx;
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+ return -EINVAL;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_get_set_elem(&ctx, set, attr);
+ if (err < 0)
+ break;
+ }
+
+ return err;
+}
+
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nft_set_elem *elem,
@@ -3777,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
kfree(elem);
}
-static int nft_setelem_parse_flags(const struct nft_set *set,
- const struct nlattr *attr, u32 *flags)
-{
- if (attr == NULL)
- return 0;
-
- *flags = ntohl(nla_get_be32(attr));
- if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
- if (!(set->flags & NFT_SET_INTERVAL) &&
- *flags & NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
-
- return 0;
-}
-
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0975d7dd6f..2647b895f4b0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
[NFTA_CT_SREG] = { .type = NLA_U32 },
};
-static int nft_ct_netns_get(struct net *net, uint8_t family)
-{
- int err;
-
- if (family == NFPROTO_INET) {
- err = nf_ct_netns_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_get(net, family);
- if (err < 0)
- goto err1;
- }
- return 0;
-
-err2:
- nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
- return err;
-}
-
-static void nft_ct_netns_put(struct net *net, uint8_t family)
-{
- if (family == NFPROTO_INET) {
- nf_ct_netns_put(net, NFPROTO_IPV4);
- nf_ct_netns_put(net, NFPROTO_IPV6);
- } else
- nf_ct_netns_put(net, family);
-}
-
#ifdef CONFIG_NF_CONNTRACK_ZONES
static void nft_ct_tmpl_put_pcpu(void)
{
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
return err;
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
goto err1;
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
struct nft_ct *priv = nft_expr_priv(expr);
__nft_ct_set_destroy(ctx, priv);
- nft_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 734989c40579..45fb2752fb63 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
return NULL;
}
+static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_bitmap_elem *be;
+
+ list_for_each_entry_rcu(be, &priv->list, head) {
+ if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) ||
+ !nft_set_elem_active(&be->ext, genmask))
+ continue;
+
+ return be;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
@@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
.activate = nft_bitmap_activate,
.lookup = nft_bitmap_lookup,
.walk = nft_bitmap_walk,
+ .get = nft_bitmap_get,
};
static struct nft_set_type nft_bitmap_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 9c0d5a7ce5f9..f8166c1d5430 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
return !!he;
}
+static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he;
+ struct nft_rhash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+ if (he != NULL)
+ return he;
+
+ return ERR_PTR(-ENOENT);
+}
+
static bool nft_rhash_update(struct nft_set *set, const u32 *key,
void *(*new)(struct nft_set *,
const struct nft_expr *,
@@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
return false;
}
+static void *nft_hash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_hash_elem *he;
+ u32 hash;
+
+ hash = jhash(elem->key.val.data, set->klen, priv->seed);
+ hash = reciprocal_scale(hash, priv->buckets);
+ hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+ if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
+ nft_set_elem_active(&he->ext, genmask))
+ return he;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
static inline u32 nft_hash_key(const u32 *key, u32 klen)
{
@@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net,
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
- set->klen) ||
+ set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
nft_set_elem_change_active(net, set, &he->ext);
return he;
@@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = {
.lookup = nft_rhash_lookup,
.update = nft_rhash_update,
.walk = nft_rhash_walk,
+ .get = nft_rhash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
};
@@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
+ .get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
@@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
.remove = nft_hash_remove,
.lookup = nft_hash_lookup_fast,
.walk = nft_hash_walk,
+ .get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d83a4ec5900d..e6f08bc5f359 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -113,6 +113,78 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
return ret;
}
+static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const u32 *key, struct nft_rbtree_elem **elem,
+ unsigned int seq, unsigned int flags, u8 genmask)
+{
+ struct nft_rbtree_elem *rbe, *interval = NULL;
+ struct nft_rbtree *priv = nft_set_priv(set);
+ const struct rb_node *parent;
+ const void *this;
+ int d;
+
+ parent = rcu_dereference_raw(priv->root.rb_node);
+ while (parent != NULL) {
+ if (read_seqcount_retry(&priv->count, seq))
+ return false;
+
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ this = nft_set_ext_key(&rbe->ext);
+ d = memcmp(this, key, set->klen);
+ if (d < 0) {
+ parent = rcu_dereference_raw(parent->rb_left);
+ interval = rbe;
+ } else if (d > 0) {
+ parent = rcu_dereference_raw(parent->rb_right);
+ } else {
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ parent = rcu_dereference_raw(parent->rb_left);
+
+ if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
+ (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
+ (flags & NFT_SET_ELEM_INTERVAL_END)) {
+ *elem = rbe;
+ return true;
+ }
+ return false;
+ }
+ }
+
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+ nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_rbtree_interval_end(interval)) {
+ *elem = interval;
+ return true;
+ }
+
+ return false;
+}
+
+static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ unsigned int seq = read_seqcount_begin(&priv->count);
+ struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
+ const u32 *key = (const u32 *)&elem->key.val;
+ u8 genmask = nft_genmask_cur(net);
+ bool ret;
+
+ ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+ if (ret || !read_seqcount_retry(&priv->count, seq))
+ return rbe;
+
+ read_lock_bh(&priv->lock);
+ seq = read_seqcount_begin(&priv->count);
+ ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+ if (!ret)
+ rbe = ERR_PTR(-ENOENT);
+ read_unlock_bh(&priv->lock);
+
+ return rbe;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
@@ -336,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
+ .get = nft_rbtree_get,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
};
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d8571f414208..a77dd514297c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table,
int *error)
{
struct xt_table_info *private;
+ unsigned int cpu;
int ret;
ret = xt_jumpstack_alloc(newinfo);
@@ -1182,14 +1183,28 @@ xt_replace_table(struct xt_table *table,
smp_wmb();
table->private = newinfo;
+ /* make sure all cpus see new ->private value */
+ smp_wmb();
+
/*
* Even though table entries have now been swapped, other CPU's
- * may still be using the old entries. This is okay, because
- * resynchronization happens because of the locking done
- * during the get_counters() routine.
+ * may still be using the old entries...
*/
local_bh_enable();
+ /* ... so wait for even xt_recseq on all cpus */
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+ u32 seq = raw_read_seqcount(s);
+
+ if (seq & 1) {
+ do {
+ cond_resched();
+ cpu_relax();
+ } while (seq == raw_read_seqcount(s));
+ }
+ }
+
#ifdef CONFIG_AUDIT
if (audit_enabled) {
audit_log(current->audit_context, GFP_KERNEL,
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index ffa8eec980e9..a6214f235333 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -46,7 +46,6 @@
struct xt_connlimit_conn {
struct hlist_node node;
struct nf_conntrack_tuple tuple;
- union nf_inet_addr addr;
};
struct xt_connlimit_rb {
@@ -72,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr)
}
static inline unsigned int
-connlimit_iphash6(const union nf_inet_addr *addr,
- const union nf_inet_addr *mask)
+connlimit_iphash6(const union nf_inet_addr *addr)
{
- union nf_inet_addr res;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
- res.ip6[i] = addr->ip6[i] & mask->ip6[i];
-
- return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
+ return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
connlimit_rnd) % CONNLIMIT_SLOTS;
}
@@ -95,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn)
}
static int
-same_source_net(const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
- const union nf_inet_addr *u3, u_int8_t family)
+same_source(const union nf_inet_addr *addr,
+ const union nf_inet_addr *u3, u_int8_t family)
{
- if (family == NFPROTO_IPV4) {
- return ntohl(addr->ip & mask->ip) -
- ntohl(u3->ip & mask->ip);
- } else {
- union nf_inet_addr lh, rh;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
- lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
- rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
- }
+ if (family == NFPROTO_IPV4)
+ return ntohl(addr->ip) - ntohl(u3->ip);
- return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
- }
+ return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
}
static bool add_hlist(struct hlist_head *head,
@@ -125,7 +106,6 @@ static bool add_hlist(struct hlist_head *head,
if (conn == NULL)
return false;
conn->tuple = *tuple;
- conn->addr = *addr;
hlist_add_head(&conn->node, head);
return true;
}
@@ -196,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root,
static unsigned int
count_tree(struct net *net, struct rb_root *root,
const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr, const union nf_inet_addr *mask,
+ const union nf_inet_addr *addr,
u8 family, const struct nf_conntrack_zone *zone)
{
struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
@@ -217,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root,
rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
parent = *rbnode;
- diff = same_source_net(addr, mask, &rbconn->addr, family);
+ diff = same_source(addr, &rbconn->addr, family);
if (diff < 0) {
rbnode = &((*rbnode)->rb_left);
} else if (diff > 0) {
@@ -270,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root,
}
conn->tuple = *tuple;
- conn->addr = *addr;
rbconn->addr = *addr;
INIT_HLIST_HEAD(&rbconn->hhead);
@@ -285,7 +264,6 @@ static int count_them(struct net *net,
struct xt_connlimit_data *data,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
u_int8_t family,
const struct nf_conntrack_zone *zone)
{
@@ -294,14 +272,14 @@ static int count_them(struct net *net,
u32 hash;
if (family == NFPROTO_IPV6)
- hash = connlimit_iphash6(addr, mask);
+ hash = connlimit_iphash6(addr);
else
- hash = connlimit_iphash(addr->ip & mask->ip);
+ hash = connlimit_iphash(addr->ip);
root = &data->climit_root[hash];
spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
- count = count_tree(net, root, tuple, addr, mask, family, zone);
+ count = count_tree(net, root, tuple, addr, family, zone);
spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
@@ -332,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ unsigned int i;
+
memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
&iph->daddr : &iph->saddr, sizeof(addr.ip6));
+
+ for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
+ addr.ip6[i] &= info->mask.ip6[i];
} else {
const struct iphdr *iph = ip_hdr(skb);
addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
iph->daddr : iph->saddr;
+
+ addr.ip &= info->mask.ip;
}
connections = count_them(net, info->data, tuple_ptr, &addr,
- &info->mask, xt_family(par), zone);
+ xt_family(par), zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 26ded4239611..a3eab903a9cd 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2136,7 +2136,7 @@ static int netlink_dump(struct sock *sk)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
struct module *module;
- int len, err = -ENOBUFS;
+ int err = -ENOBUFS;
int alloc_min_size;
int alloc_size;
@@ -2183,9 +2183,11 @@ static int netlink_dump(struct sock *sk)
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
- len = cb->dump(skb, cb);
+ if (nlk->dump_done_errno > 0)
+ nlk->dump_done_errno = cb->dump(skb, cb);
- if (len > 0) {
+ if (nlk->dump_done_errno > 0 ||
+ skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
mutex_unlock(nlk->cb_mutex);
if (sk_filter(sk, skb))
@@ -2195,13 +2197,15 @@ static int netlink_dump(struct sock *sk)
return 0;
}
- nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
- if (!nlh)
+ nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
+ sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+ if (WARN_ON(!nlh))
goto errout_skb;
nl_dump_check_consistent(cb, nlh);
- memcpy(nlmsg_data(nlh), &len, sizeof(len));
+ memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
+ sizeof(nlk->dump_done_errno));
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2273,6 +2277,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk->cb_running = true;
+ nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 028188597eaa..962de7b3c023 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -34,6 +34,7 @@ struct netlink_sock {
wait_queue_head_t wait;
bool bound;
bool cb_running;
+ int dump_done_errno;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 58fb827439a8..d7da99a0b0b8 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -14,6 +14,66 @@
#include <net/nsh.h>
#include <net/tun_proto.h>
+int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh)
+{
+ struct nshhdr *nh;
+ size_t length = nsh_hdr_len(pushed_nh);
+ u8 next_proto;
+
+ if (skb->mac_len) {
+ next_proto = TUN_P_ETHERNET;
+ } else {
+ next_proto = tun_p_from_eth_p(skb->protocol);
+ if (!next_proto)
+ return -EAFNOSUPPORT;
+ }
+
+ /* Add the NSH header */
+ if (skb_cow_head(skb, length) < 0)
+ return -ENOMEM;
+
+ skb_push(skb, length);
+ nh = (struct nshhdr *)(skb->data);
+ memcpy(nh, pushed_nh, length);
+ nh->np = next_proto;
+ skb_postpush_rcsum(skb, nh, length);
+
+ skb->protocol = htons(ETH_P_NSH);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_push);
+
+int nsh_pop(struct sk_buff *skb)
+{
+ struct nshhdr *nh;
+ size_t length;
+ __be16 inner_proto;
+
+ if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
+ return -ENOMEM;
+ nh = (struct nshhdr *)(skb->data);
+ length = nsh_hdr_len(nh);
+ inner_proto = tun_p_to_eth_p(nh->np);
+ if (!pskb_may_pull(skb, length))
+ return -ENOMEM;
+
+ if (!inner_proto)
+ return -EAFNOSUPPORT;
+
+ skb_pull_rcsum(skb, length);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
+ skb->protocol = inner_proto;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_pop);
+
static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ce947292ae77..2650205cdaf9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -14,6 +14,7 @@ config OPENVSWITCH
select MPLS
select NET_MPLS_GSO
select DST_CACHE
+ select NET_NSH
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 299f4476cf44..41109c326f3a 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -12,6 +12,7 @@ openvswitch-y := \
flow.o \
flow_netlink.o \
flow_table.o \
+ meter.o \
vport.o \
vport-internal_dev.o \
vport-netdev.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a551232daf61..30a5df27116e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -43,6 +43,7 @@
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
+#include "flow_netlink.h"
struct deferred_action {
struct sk_buff *skb;
@@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
+static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct nshhdr *nh)
+{
+ int err;
+
+ err = nsh_push(skb, nh);
+ if (err)
+ return err;
+
+ /* safe right before invalidate_flow_key */
+ key->mac_proto = MAC_PROTO_NONE;
+ invalidate_flow_key(key);
+ return 0;
+}
+
+static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ int err;
+
+ err = nsh_pop(skb);
+ if (err)
+ return err;
+
+ /* safe right before invalidate_flow_key */
+ if (skb->protocol == htons(ETH_P_TEB))
+ key->mac_proto = MAC_PROTO_ETHERNET;
+ else
+ key->mac_proto = MAC_PROTO_NONE;
+ invalidate_flow_key(key);
+ return 0;
+}
+
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
__be32 addr, __be32 new_addr)
{
@@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
+static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct nlattr *a)
+{
+ struct nshhdr *nh;
+ size_t length;
+ int err;
+ u8 flags;
+ u8 ttl;
+ int i;
+
+ struct ovs_key_nsh key;
+ struct ovs_key_nsh mask;
+
+ err = nsh_key_from_nlattr(a, &key, &mask);
+ if (err)
+ return err;
+
+ /* Make sure the NSH base header is there */
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
+ return -ENOMEM;
+
+ nh = nsh_hdr(skb);
+ length = nsh_hdr_len(nh);
+
+ /* Make sure the whole NSH header is there */
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
+ length);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ skb_postpull_rcsum(skb, nh, length);
+ flags = nsh_get_flags(nh);
+ flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
+ flow_key->nsh.base.flags = flags;
+ ttl = nsh_get_ttl(nh);
+ ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
+ flow_key->nsh.base.ttl = ttl;
+ nsh_set_flags_and_ttl(nh, flags, ttl);
+ nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
+ mask.base.path_hdr);
+ flow_key->nsh.base.path_hdr = nh->path_hdr;
+ switch (nh->mdtype) {
+ case NSH_M_TYPE1:
+ for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
+ nh->md1.context[i] =
+ OVS_MASKED(nh->md1.context[i], key.context[i],
+ mask.context[i]);
+ }
+ memcpy(flow_key->nsh.context, nh->md1.context,
+ sizeof(nh->md1.context));
+ break;
+ case NSH_M_TYPE2:
+ memset(flow_key->nsh.context, 0,
+ sizeof(flow_key->nsh.context));
+ break;
+ default:
+ return -EINVAL;
+ }
+ skb_postpush_rcsum(skb, nh, length);
+ return 0;
+}
+
/* Must follow skb_ensure_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
@@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb,
get_mask(a, struct ovs_key_ethernet *));
break;
+ case OVS_KEY_ATTR_NSH:
+ err = set_nsh(skb, flow_key, a);
+ break;
+
case OVS_KEY_ATTR_IPV4:
err = set_ipv4(skb, flow_key, nla_data(a),
get_mask(a, struct ovs_key_ipv4 *));
@@ -1214,6 +1314,28 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_POP_ETH:
err = pop_eth(skb, key);
break;
+
+ case OVS_ACTION_ATTR_PUSH_NSH: {
+ u8 buffer[NSH_HDR_MAX_LEN];
+ struct nshhdr *nh = (struct nshhdr *)buffer;
+
+ err = nsh_hdr_from_nlattr(nla_data(a), nh,
+ NSH_HDR_MAX_LEN);
+ if (unlikely(err))
+ break;
+ err = push_nsh(skb, key, nh);
+ break;
+ }
+
+ case OVS_ACTION_ATTR_POP_NSH:
+ err = pop_nsh(skb, key);
+ break;
+
+ case OVS_ACTION_ATTR_METER:
+ if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
+ consume_skb(skb);
+ return 0;
+ }
}
if (unlikely(err)) {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4d38ac044cee..0dab33fb9844 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
+#include "meter.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -142,35 +143,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *,
uint32_t cutlen);
-/* Must be called with rcu_read_lock. */
-static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
-{
- struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
-
- if (dev) {
- struct vport *vport = ovs_internal_dev_get_vport(dev);
- if (vport)
- return vport->dp;
- }
-
- return NULL;
-}
-
-/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
- * returned dp pointer valid.
- */
-static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
-{
- struct datapath *dp;
-
- WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
- rcu_read_lock();
- dp = get_dp_rcu(net, dp_ifindex);
- rcu_read_unlock();
-
- return dp;
-}
-
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -203,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
kfree(dp->ports);
+ ovs_meters_exit(dp);
kfree(dp);
}
@@ -1601,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_meters_init(dp);
+ if (err)
+ goto err_destroy_ports_array;
+
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1629,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
- goto err_destroy_ports_array;
+ goto err_destroy_meters;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1644,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_ports_array:
+err_destroy_meters:
ovs_unlock();
+ ovs_meters_exit(dp);
+err_destroy_ports_array:
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
@@ -2294,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = {
&dp_vport_genl_family,
&dp_flow_genl_family,
&dp_packet_genl_family,
+ &dp_meter_genl_family,
};
static void dp_unregister_genl(int n_families)
@@ -2474,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4a104ef9e12c..523d65526766 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -30,6 +30,8 @@
#include "conntrack.h"
#include "flow.h"
#include "flow_table.h"
+#include "meter.h"
+#include "vport-internal_dev.h"
#define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
@@ -91,6 +93,9 @@ struct datapath {
u32 user_features;
u32 max_headroom;
+
+ /* Switch meters. */
+ struct hlist_head *meters;
};
/**
@@ -190,6 +195,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
return ovs_lookup_vport(dp, port_no);
}
+/* Must be called with rcu_read_lock. */
+static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
+{
+ struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
+
+ if (dev) {
+ struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+ if (vport)
+ return vport->dp;
+ }
+
+ return NULL;
+}
+
+/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
+ * returned dp pointer valid.
+ */
+static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+ struct datapath *dp;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ rcu_read_lock();
+ dp = get_dp_rcu(net, dp_ifindex);
+ rcu_read_unlock();
+
+ return dp;
+}
+
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8c94cef25a72..864ddb1e3642 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
#include <net/ipv6.h>
#include <net/mpls.h>
#include <net/ndisc.h>
+#include <net/nsh.h>
#include "conntrack.h"
#include "datapath.h"
@@ -490,6 +491,52 @@ invalid:
return 0;
}
+static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ struct nshhdr *nh;
+ unsigned int nh_ofs = skb_network_offset(skb);
+ u8 version, length;
+ int err;
+
+ err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ version = nsh_get_ver(nh);
+ length = nsh_hdr_len(nh);
+
+ if (version != 0)
+ return -EINVAL;
+
+ err = check_header(skb, nh_ofs + length);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ key->nsh.base.flags = nsh_get_flags(nh);
+ key->nsh.base.ttl = nsh_get_ttl(nh);
+ key->nsh.base.mdtype = nh->mdtype;
+ key->nsh.base.np = nh->np;
+ key->nsh.base.path_hdr = nh->path_hdr;
+ switch (key->nsh.base.mdtype) {
+ case NSH_M_TYPE1:
+ if (length != NSH_M_TYPE1_LEN)
+ return -EINVAL;
+ memcpy(key->nsh.context, nh->md1.context,
+ sizeof(nh->md1));
+ break;
+ case NSH_M_TYPE2:
+ memset(key->nsh.context, 0,
+ sizeof(nh->md1));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* key_extract - extracts a flow key from an Ethernet frame.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -735,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->tp, 0, sizeof(key->tp));
}
}
+ } else if (key->eth.type == htons(ETH_P_NSH)) {
+ error = parse_nsh(skb, key);
+ if (error)
+ return error;
}
return 0;
}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1875bba4f865..c670dd24b8b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -35,6 +35,7 @@
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
#include <net/dst_metadata.h>
+#include <net/nsh.h>
struct sk_buff;
@@ -66,6 +67,11 @@ struct vlan_head {
(offsetof(struct sw_flow_key, recirc_id) + \
FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+struct ovs_key_nsh {
+ struct ovs_nsh_key_base base;
+ __be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
struct sw_flow_key {
u8 tun_opts[IP_TUNNEL_OPTS_MAX];
u8 tun_opts_len;
@@ -143,6 +149,7 @@ struct sw_flow_key {
} nd;
};
} ipv6;
+ struct ovs_key_nsh nsh; /* network service header */
};
struct {
/* Connection tracking fields not packed above. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index dc0d79092e74..bb4dae198c78 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@
#include <net/ndisc.h>
#include <net/mpls.h>
#include <net/vxlan.h>
+#include <net/tun_proto.h>
#include <net/erspan.h>
#include "flow_netlink.h"
@@ -80,13 +81,16 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_POP_ETH:
case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_ETH:
case OVS_ACTION_ATTR_PUSH_MPLS:
+ case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
+ case OVS_ACTION_ATTR_METER:
default:
return true;
}
@@ -175,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match,
| (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP)
| (1 << OVS_KEY_ATTR_ND)
- | (1 << OVS_KEY_ATTR_MPLS));
+ | (1 << OVS_KEY_ATTR_MPLS)
+ | (1 << OVS_KEY_ATTR_NSH));
/* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -284,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match,
}
}
+ if (match->key->eth.type == htons(ETH_P_NSH)) {
+ key_expected |= 1 << OVS_KEY_ATTR_NSH;
+ if (match->mask &&
+ match->mask->key.eth.type == htons(0xffff)) {
+ mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
+ }
+ }
+
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
@@ -325,12 +338,25 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
}
+size_t ovs_nsh_key_attr_size(void)
+{
+ /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
+ * updating this function.
+ */
+ return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
+ /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
+ * mutually exclusive, so the bigger one can cover
+ * the small one.
+ */
+ + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
+}
+
size_t ovs_key_attr_size(void)
{
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -344,6 +370,8 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
+ nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+ + nla_total_size(0) /* OVS_KEY_ATTR_NSH */
+ + ovs_nsh_key_attr_size()
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -377,6 +405,13 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
};
+static const struct ovs_len_tbl
+ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
+ [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
+ [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) },
+ [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE },
+};
+
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
@@ -409,6 +444,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
+ [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
+ .next = ovs_nsh_key_attr_lens, },
};
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -1227,6 +1264,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
return 0;
}
+int nsh_hdr_from_nlattr(const struct nlattr *attr,
+ struct nshhdr *nh, size_t size)
+{
+ struct nlattr *a;
+ int rem;
+ u8 flags = 0;
+ u8 ttl = 0;
+ int mdlen = 0;
+
+ /* validate_nsh has check this, so we needn't do duplicate check here
+ */
+ if (size < NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+
+ flags = base->flags;
+ ttl = base->ttl;
+ nh->np = base->np;
+ nh->mdtype = base->mdtype;
+ nh->path_hdr = base->path_hdr;
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1:
+ mdlen = nla_len(a);
+ if (mdlen > size - NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+ memcpy(&nh->md1, nla_data(a), mdlen);
+ break;
+
+ case OVS_NSH_KEY_ATTR_MD2:
+ mdlen = nla_len(a);
+ if (mdlen > size - NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+ memcpy(&nh->md2, nla_data(a), mdlen);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* nsh header length = NSH_BASE_HDR_LEN + mdlen */
+ nh->ver_flags_ttl_len = 0;
+ nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
+
+ return 0;
+}
+
+int nsh_key_from_nlattr(const struct nlattr *attr,
+ struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
+{
+ struct nlattr *a;
+ int rem;
+
+ /* validate_nsh has check this, so we needn't do duplicate check here
+ */
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+ const struct ovs_nsh_key_base *base_mask = base + 1;
+
+ nsh->base = *base;
+ nsh_mask->base = *base_mask;
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1: {
+ const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+ const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
+
+ memcpy(nsh->context, md1->context, sizeof(*md1));
+ memcpy(nsh_mask->context, md1_mask->context,
+ sizeof(*md1_mask));
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD2:
+ /* Not supported yet */
+ return -ENOTSUPP;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int nsh_key_put_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool is_push_nsh, bool log)
+{
+ struct nlattr *a;
+ int rem;
+ bool has_base = false;
+ bool has_md1 = false;
+ bool has_md2 = false;
+ u8 mdtype = 0;
+ int mdlen = 0;
+
+ if (WARN_ON(is_push_nsh && is_mask))
+ return -EINVAL;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ int i;
+
+ if (type > OVS_NSH_KEY_ATTR_MAX) {
+ OVS_NLERR(log, "nsh attr %d is out of range max %d",
+ type, OVS_NSH_KEY_ATTR_MAX);
+ return -EINVAL;
+ }
+
+ if (!check_attr_len(nla_len(a),
+ ovs_nsh_key_attr_lens[type].len)) {
+ OVS_NLERR(
+ log,
+ "nsh attr %d has unexpected len %d expected %d",
+ type,
+ nla_len(a),
+ ovs_nsh_key_attr_lens[type].len
+ );
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+
+ has_base = true;
+ mdtype = base->mdtype;
+ SW_FLOW_KEY_PUT(match, nsh.base.flags,
+ base->flags, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.ttl,
+ base->ttl, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
+ base->mdtype, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.np,
+ base->np, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
+ base->path_hdr, is_mask);
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1: {
+ const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+
+ has_md1 = true;
+ for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
+ SW_FLOW_KEY_PUT(match, nsh.context[i],
+ md1->context[i], is_mask);
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD2:
+ if (!is_push_nsh) /* Not supported MD type 2 yet */
+ return -ENOTSUPP;
+
+ has_md2 = true;
+ mdlen = nla_len(a);
+ if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
+ OVS_NLERR(
+ log,
+ "Invalid MD length %d for MD type %d",
+ mdlen,
+ mdtype
+ );
+ return -EINVAL;
+ }
+ break;
+ default:
+ OVS_NLERR(log, "Unknown nsh attribute %d",
+ type);
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0) {
+ OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
+ return -EINVAL;
+ }
+
+ if (has_md1 && has_md2) {
+ OVS_NLERR(
+ 1,
+ "invalid nsh attribute: md1 and md2 are exclusive."
+ );
+ return -EINVAL;
+ }
+
+ if (!is_mask) {
+ if ((has_md1 && mdtype != NSH_M_TYPE1) ||
+ (has_md2 && mdtype != NSH_M_TYPE2)) {
+ OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
+ mdtype);
+ return -EINVAL;
+ }
+
+ if (is_push_nsh &&
+ (!has_base || (!has_md1 && !has_md2))) {
+ OVS_NLERR(
+ 1,
+ "push_nsh: missing base or metadata attributes"
+ );
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
u64 attrs, const struct nlattr **a,
bool is_mask, bool log)
@@ -1354,6 +1606,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
attrs &= ~(1 << OVS_KEY_ATTR_ARP);
}
+ if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
+ if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
+ is_mask, false, log) < 0)
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_NSH);
+ }
+
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
@@ -1670,6 +1929,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
return 0;
}
+static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
+ struct sk_buff *skb)
+{
+ struct nlattr *start;
+
+ start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
+ if (!start)
+ return -EMSGSIZE;
+
+ if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
+ goto nla_put_failure;
+
+ if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
+ if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
+ sizeof(nsh->context), nsh->context))
+ goto nla_put_failure;
+ }
+
+ /* Don't support MD type 2 yet */
+
+ nla_nest_end(skb, start);
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, bool is_mask,
struct sk_buff *skb)
@@ -1798,6 +2085,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ipv6_key->ipv6_tclass = output->ip.tos;
ipv6_key->ipv6_hlimit = output->ip.ttl;
ipv6_key->ipv6_frag = output->ip.frag;
+ } else if (swkey->eth.type == htons(ETH_P_NSH)) {
+ if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
+ goto nla_put_failure;
} else if (swkey->eth.type == htons(ETH_P_ARP) ||
swkey->eth.type == htons(ETH_P_RARP)) {
struct ovs_key_arp *arp_key;
@@ -2292,6 +2582,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
return err;
}
+static bool validate_nsh(const struct nlattr *attr, bool is_mask,
+ bool is_push_nsh, bool log)
+{
+ struct sw_flow_match match;
+ struct sw_flow_key key;
+ int ret = 0;
+
+ ovs_match_init(&match, &key, true, NULL);
+ ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
+ is_push_nsh, log);
+ return !ret;
+}
+
/* Return false if there are any non-masked bits set.
* Mask follows data immediately, before any netlink padding.
*/
@@ -2434,6 +2737,13 @@ static int validate_set(const struct nlattr *a,
break;
+ case OVS_KEY_ATTR_NSH:
+ if (eth_type != htons(ETH_P_NSH))
+ return -EINVAL;
+ if (!validate_nsh(nla_data(a), masked, false, log))
+ return -EINVAL;
+ break;
+
default:
return -EINVAL;
}
@@ -2533,6 +2843,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
[OVS_ACTION_ATTR_POP_ETH] = 0,
+ [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
+ [OVS_ACTION_ATTR_POP_NSH] = 0,
+ [OVS_ACTION_ATTR_METER] = sizeof(u32),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -2690,6 +3003,38 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
mac_proto = MAC_PROTO_ETHERNET;
break;
+ case OVS_ACTION_ATTR_PUSH_NSH:
+ if (mac_proto != MAC_PROTO_ETHERNET) {
+ u8 next_proto;
+
+ next_proto = tun_p_from_eth_p(eth_type);
+ if (!next_proto)
+ return -EINVAL;
+ }
+ mac_proto = MAC_PROTO_NONE;
+ if (!validate_nsh(nla_data(a), false, true, true))
+ return -EINVAL;
+ break;
+
+ case OVS_ACTION_ATTR_POP_NSH: {
+ __be16 inner_proto;
+
+ if (eth_type != htons(ETH_P_NSH))
+ return -EINVAL;
+ inner_proto = tun_p_to_eth_p(key->nsh.base.np);
+ if (!inner_proto)
+ return -EINVAL;
+ if (key->nsh.base.np == TUN_P_ETHERNET)
+ mac_proto = MAC_PROTO_ETHERNET;
+ else
+ mac_proto = MAC_PROTO_NONE;
+ break;
+ }
+
+ case OVS_ACTION_ATTR_METER:
+ /* Non-existent meters are simply ignored. */
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 929c665ac3aa..6657606b2b47 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr,
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
+int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
+ struct ovs_key_nsh *nsh_mask);
+int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
+ size_t size);
+
#endif /* flow_netlink.h */
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
new file mode 100644
index 000000000000..2a5ba356c472
--- /dev/null
+++ b/net/openvswitch/meter.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/openvswitch.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "meter.h"
+
+#define METER_HASH_BUCKETS 1024
+
+static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
+ [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
+ [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
+ [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+ [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
+ [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
+ [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
+ [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
+ [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
+ [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+};
+
+static void rcu_free_ovs_meter_callback(struct rcu_head *rcu)
+{
+ struct dp_meter *meter = container_of(rcu, struct dp_meter, rcu);
+
+ kfree(meter);
+}
+
+static void ovs_meter_free(struct dp_meter *meter)
+{
+ if (!meter)
+ return;
+
+ call_rcu(&meter->rcu, rcu_free_ovs_meter_callback);
+}
+
+static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
+ u32 meter_id)
+{
+ return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
+}
+
+/* Call with ovs_mutex or RCU read lock. */
+static struct dp_meter *lookup_meter(const struct datapath *dp,
+ u32 meter_id)
+{
+ struct dp_meter *meter;
+ struct hlist_head *head;
+
+ head = meter_hash_bucket(dp, meter_id);
+ hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+ if (meter->id == meter_id)
+ return meter;
+ }
+ return NULL;
+}
+
+static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+{
+ struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+
+ hlist_add_head_rcu(&meter->dp_hash_node, head);
+}
+
+static void detach_meter(struct dp_meter *meter)
+{
+ ASSERT_OVSL();
+ if (meter)
+ hlist_del_rcu(&meter->dp_hash_node);
+}
+
+static struct sk_buff *
+ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
+ struct ovs_header **ovs_reply_header)
+{
+ struct sk_buff *skb;
+ struct ovs_header *ovs_header = info->userhdr;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
+ info->snd_seq,
+ &dp_meter_genl_family, 0, cmd);
+ if (!ovs_reply_header) {
+ nlmsg_free(skb);
+ return ERR_PTR(-EMSGSIZE);
+ }
+ (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
+
+ return skb;
+}
+
+static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
+ struct dp_meter *meter)
+{
+ struct nlattr *nla;
+ struct dp_meter_band *band;
+ u16 i;
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
+ goto error;
+
+ if (!meter)
+ return 0;
+
+ if (nla_put(reply, OVS_METER_ATTR_STATS,
+ sizeof(struct ovs_flow_stats), &meter->stats) ||
+ nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+ OVS_METER_ATTR_PAD))
+ goto error;
+
+ nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ if (!nla)
+ goto error;
+
+ band = meter->bands;
+
+ for (i = 0; i < meter->n_bands; ++i, ++band) {
+ struct nlattr *band_nla;
+
+ band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
+ sizeof(struct ovs_flow_stats),
+ &band->stats))
+ goto error;
+ nla_nest_end(reply, band_nla);
+ }
+ nla_nest_end(reply, nla);
+
+ return 0;
+error:
+ return -EMSGSIZE;
+}
+
+static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *reply;
+ struct ovs_header *ovs_reply_header;
+ struct nlattr *nla, *band_nla;
+ int err;
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
+ &ovs_reply_header);
+ if (!reply)
+ return PTR_ERR(reply);
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
+ nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+ goto nla_put_failure;
+
+ nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ if (!nla)
+ goto nla_put_failure;
+
+ band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ if (!band_nla)
+ goto nla_put_failure;
+ /* Currently only DROP band type is supported. */
+ if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
+ goto nla_put_failure;
+ nla_nest_end(reply, band_nla);
+ nla_nest_end(reply, nla);
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+nla_put_failure:
+ nlmsg_free(reply);
+ err = -EMSGSIZE;
+ return err;
+}
+
+static struct dp_meter *dp_meter_create(struct nlattr **a)
+{
+ struct nlattr *nla;
+ int rem;
+ u16 n_bands = 0;
+ struct dp_meter *meter;
+ struct dp_meter_band *band;
+ int err;
+
+ /* Validate attributes, count the bands. */
+ if (!a[OVS_METER_ATTR_BANDS])
+ return ERR_PTR(-EINVAL);
+
+ nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
+ if (++n_bands > DP_MAX_BANDS)
+ return ERR_PTR(-EINVAL);
+
+ /* Allocate and set up the meter before locking anything. */
+ meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
+ sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
+ meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
+ meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
+ spin_lock_init(&meter->lock);
+ if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
+ meter->stats = *(struct ovs_flow_stats *)
+ nla_data(a[OVS_METER_ATTR_STATS]);
+ }
+ meter->n_bands = n_bands;
+
+ /* Set up meter bands. */
+ band = meter->bands;
+ nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
+ struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
+ u32 band_max_delta_t;
+
+ err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
+ nla_data(nla), nla_len(nla), band_policy,
+ NULL);
+ if (err)
+ goto exit_free_meter;
+
+ if (!attr[OVS_BAND_ATTR_TYPE] ||
+ !attr[OVS_BAND_ATTR_RATE] ||
+ !attr[OVS_BAND_ATTR_BURST]) {
+ err = -EINVAL;
+ goto exit_free_meter;
+ }
+
+ band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
+ band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+ band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
+ /* Figure out max delta_t that is enough to fill any bucket.
+ * Keep max_delta_t size to the bucket units:
+ * pkts => 1/1000 packets, kilobits => bits.
+ */
+ band_max_delta_t = (band->burst_size + band->rate) * 1000;
+ /* Start with a full bucket. */
+ band->bucket = band_max_delta_t;
+ if (band_max_delta_t > meter->max_delta_t)
+ meter->max_delta_t = band_max_delta_t;
+ band++;
+ }
+
+ return meter;
+
+exit_free_meter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct dp_meter *meter, *old_meter;
+ struct sk_buff *reply;
+ struct ovs_header *ovs_reply_header;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct datapath *dp;
+ int err;
+ u32 meter_id;
+ bool failed;
+
+ meter = dp_meter_create(a);
+ if (IS_ERR_OR_NULL(meter))
+ return PTR_ERR(meter);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
+ &ovs_reply_header);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ goto exit_free_meter;
+ }
+
+ ovs_lock();
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ if (!a[OVS_METER_ATTR_ID]) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ /* Cannot fail after this. */
+ old_meter = lookup_meter(dp, meter_id);
+ detach_meter(old_meter);
+ attach_meter(dp, meter);
+ ovs_unlock();
+
+ /* Build response with the meter_id and stats from
+ * the old meter, if any.
+ */
+ failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
+ WARN_ON(failed);
+ if (old_meter) {
+ spin_lock_bh(&old_meter->lock);
+ if (old_meter->keep_stats) {
+ err = ovs_meter_cmd_reply_stats(reply, meter_id,
+ old_meter);
+ WARN_ON(err);
+ }
+ spin_unlock_bh(&old_meter->lock);
+ ovs_meter_free(old_meter);
+ }
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+exit_free_meter:
+ kfree(meter);
+ return err;
+}
+
+static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ u32 meter_id;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_reply_header;
+ struct datapath *dp;
+ int err;
+ struct sk_buff *reply;
+ struct dp_meter *meter;
+
+ if (!a[OVS_METER_ATTR_ID])
+ return -EINVAL;
+
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ ovs_lock();
+
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ /* Locate meter, copy stats. */
+ meter = lookup_meter(dp, meter_id);
+ if (!meter) {
+ err = -ENOENT;
+ goto exit_unlock;
+ }
+
+ spin_lock_bh(&meter->lock);
+ err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
+ spin_unlock_bh(&meter->lock);
+ if (err)
+ goto exit_unlock;
+
+ ovs_unlock();
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+ return err;
+}
+
+static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ u32 meter_id;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_reply_header;
+ struct datapath *dp;
+ int err;
+ struct sk_buff *reply;
+ struct dp_meter *old_meter;
+
+ if (!a[OVS_METER_ATTR_ID])
+ return -EINVAL;
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ ovs_lock();
+
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ old_meter = lookup_meter(dp, meter_id);
+ if (old_meter) {
+ spin_lock_bh(&old_meter->lock);
+ err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
+ WARN_ON(err);
+ spin_unlock_bh(&old_meter->lock);
+ detach_meter(old_meter);
+ }
+ ovs_unlock();
+ ovs_meter_free(old_meter);
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+ return err;
+}
+
+/* Meter action execution.
+ *
+ * Return true 'meter_id' drop band is triggered. The 'skb' should be
+ * dropped by the caller'.
+ */
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 meter_id)
+{
+ struct dp_meter *meter;
+ struct dp_meter_band *band;
+ long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
+ long long int long_delta_ms;
+ u32 delta_ms;
+ u32 cost;
+ int i, band_exceeded_max = -1;
+ u32 band_exceeded_rate = 0;
+
+ meter = lookup_meter(dp, meter_id);
+ /* Do not drop the packet when there is no meter. */
+ if (!meter)
+ return false;
+
+ /* Lock the meter while using it. */
+ spin_lock(&meter->lock);
+
+ long_delta_ms = (now_ms - meter->used); /* ms */
+
+ /* Make sure delta_ms will not be too large, so that bucket will not
+ * wrap around below.
+ */
+ delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
+ ? meter->max_delta_t : (u32)long_delta_ms;
+
+ /* Update meter statistics.
+ */
+ meter->used = now_ms;
+ meter->stats.n_packets += 1;
+ meter->stats.n_bytes += skb->len;
+
+ /* Bucket rate is either in kilobits per second, or in packets per
+ * second. We maintain the bucket in the units of either bits or
+ * 1/1000th of a packet, correspondingly.
+ * Then, when rate is multiplied with milliseconds, we get the
+ * bucket units:
+ * msec * kbps = bits, and
+ * msec * packets/sec = 1/1000 packets.
+ *
+ * 'cost' is the number of bucket units in this packet.
+ */
+ cost = (meter->kbps) ? skb->len * 8 : 1000;
+
+ /* Update all bands and find the one hit with the highest rate. */
+ for (i = 0; i < meter->n_bands; ++i) {
+ long long int max_bucket_size;
+
+ band = &meter->bands[i];
+ max_bucket_size = (band->burst_size + band->rate) * 1000;
+
+ band->bucket += delta_ms * band->rate;
+ if (band->bucket > max_bucket_size)
+ band->bucket = max_bucket_size;
+
+ if (band->bucket >= cost) {
+ band->bucket -= cost;
+ } else if (band->rate > band_exceeded_rate) {
+ band_exceeded_rate = band->rate;
+ band_exceeded_max = i;
+ }
+ }
+
+ if (band_exceeded_max >= 0) {
+ /* Update band statistics. */
+ band = &meter->bands[band_exceeded_max];
+ band->stats.n_packets += 1;
+ band->stats.n_bytes += skb->len;
+
+ /* Drop band triggered, let the caller drop the 'skb'. */
+ if (band->type == OVS_METER_BAND_TYPE_DROP) {
+ spin_unlock(&meter->lock);
+ return true;
+ }
+ }
+
+ spin_unlock(&meter->lock);
+ return false;
+}
+
+static struct genl_ops dp_meter_genl_ops[] = {
+ { .cmd = OVS_METER_CMD_FEATURES,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_features
+ },
+ { .cmd = OVS_METER_CMD_SET,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_set,
+ },
+ { .cmd = OVS_METER_CMD_GET,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_get,
+ },
+ { .cmd = OVS_METER_CMD_DEL,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_del
+ },
+};
+
+static const struct genl_multicast_group ovs_meter_multicast_group = {
+ .name = OVS_METER_MCGROUP,
+};
+
+struct genl_family dp_meter_genl_family __ro_after_init = {
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_METER_FAMILY,
+ .version = OVS_METER_VERSION,
+ .maxattr = OVS_METER_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = dp_meter_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
+ .mcgrps = &ovs_meter_multicast_group,
+ .n_mcgrps = 1,
+ .module = THIS_MODULE,
+};
+
+int ovs_meters_init(struct datapath *dp)
+{
+ int i;
+
+ dp->meters = kmalloc_array(METER_HASH_BUCKETS,
+ sizeof(struct hlist_head), GFP_KERNEL);
+
+ if (!dp->meters)
+ return -ENOMEM;
+
+ for (i = 0; i < METER_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->meters[i]);
+
+ return 0;
+}
+
+void ovs_meters_exit(struct datapath *dp)
+{
+ int i;
+
+ for (i = 0; i < METER_HASH_BUCKETS; i++) {
+ struct hlist_head *head = &dp->meters[i];
+ struct dp_meter *meter;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
+ kfree(meter);
+ }
+
+ kfree(dp->meters);
+}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
new file mode 100644
index 000000000000..964ace2650f8
--- /dev/null
+++ b/net/openvswitch/meter.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#ifndef METER_H
+#define METER_H 1
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/genetlink.h>
+#include <linux/skbuff.h>
+
+#include "flow.h"
+struct datapath;
+
+#define DP_MAX_BANDS 1
+
+struct dp_meter_band {
+ u32 type;
+ u32 rate;
+ u32 burst_size;
+ u32 bucket; /* 1/1000 packets, or in bits */
+ struct ovs_flow_stats stats;
+};
+
+struct dp_meter {
+ spinlock_t lock; /* Per meter lock */
+ struct rcu_head rcu;
+ struct hlist_node dp_hash_node; /*Element in datapath->meters
+ * hash table.
+ */
+ u32 id;
+ u16 kbps:1, keep_stats:1;
+ u16 n_bands;
+ u32 max_delta_t;
+ u64 used;
+ struct ovs_flow_stats stats;
+ struct dp_meter_band bands[];
+};
+
+extern struct genl_family dp_meter_genl_family;
+int ovs_meters_init(struct datapath *dp);
+void ovs_meters_exit(struct datapath *dp);
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 meter_id);
+
+#endif /* meter.h */
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index e458ece96d3d..77ab05e23001 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1120,7 +1120,7 @@ static int __init qrtr_proto_init(void)
return 0;
}
-module_init(qrtr_proto_init);
+postcore_initcall(qrtr_proto_init);
static void __exit qrtr_proto_fini(void)
{
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 9722bf839d9d..b4e421aa9727 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
break;
}
- /* XXX when can this fail? */
- ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
- rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
+ rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
(long) ib_sg_dma_address(
ic->i_cm_id->device,
- &recv->r_frag->f_sg),
- ret);
+ &recv->r_frag->f_sg));
+
+ /* XXX when can this fail? */
+ ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
if (ret) {
rds_ib_conn_error(conn, "recv post on "
"%pI4 returned %d, disconnecting and "
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ff4d69082376..4d33a50a8a6d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -80,7 +80,6 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
spin_lock_bh(&idrinfo->lock);
idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
spin_unlock_bh(&idrinfo->lock);
- put_net(idrinfo->net);
gen_kill_estimator(&p->tcfa_rate_est);
free_tcf(p);
}
@@ -339,7 +338,6 @@ err3:
p->idrinfo = idrinfo;
p->ops = ops;
INIT_LIST_HEAD(&p->list);
- get_net(idrinfo->net);
*a = p;
return 0;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 035ed268290b..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tc_action_net_init(tn, &act_bpf_ops, net);
+ return tc_action_net_init(tn, &act_bpf_ops);
}
static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 34e52d01a5dd..10b7a8855a6c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
- return tc_action_net_init(tn, &act_connmark_ops, net);
+ return tc_action_net_init(tn, &act_connmark_ops);
}
static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 35171df2ebef..1c40caadcff9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tc_action_net_init(tn, &act_csum_ops, net);
+ return tc_action_net_init(tn, &act_csum_ops);
}
static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ef7f7f39d26d..e29a48ef7fc3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
- return tc_action_net_init(tn, &act_gact_ops, net);
+ return tc_action_net_init(tn, &act_gact_ops);
}
static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index eca272533418..3007cb1310ea 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -855,7 +855,7 @@ static __net_init int ife_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
- return tc_action_net_init(tn, &act_ife_ops, net);
+ return tc_action_net_init(tn, &act_ife_ops);
}
static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index dbdf3b2470d5..d9e399a7e3d5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tc_action_net_init(tn, &act_ipt_ops, net);
+ return tc_action_net_init(tn, &act_ipt_ops);
}
static void __net_exit ipt_exit_net(struct net *net)
@@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tc_action_net_init(tn, &act_xt_ops, net);
+ return tc_action_net_init(tn, &act_xt_ops);
}
static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8df5775bbf22..8b3e59388480 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -340,7 +340,7 @@ static __net_init int mirred_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
- return tc_action_net_init(tn, &act_mirred_ops, net);
+ return tc_action_net_init(tn, &act_mirred_ops);
}
static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7eeaaf9217b6..c365d01b99c8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
- return tc_action_net_init(tn, &act_nat_ops, net);
+ return tc_action_net_init(tn, &act_nat_ops);
}
static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b3d82c334a5f..491fe5deb09e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tc_action_net_init(tn, &act_pedit_ops, net);
+ return tc_action_net_init(tn, &act_pedit_ops);
}
static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 9ec42b26e4b9..3bb2ebf9e9ae 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tc_action_net_init(tn, &act_police_ops, net);
+ return tc_action_net_init(tn, &act_police_ops);
}
static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index e69a1e3a39bf..8b5abcd2f32f 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tc_action_net_init(tn, &act_sample_ops, net);
+ return tc_action_net_init(tn, &act_sample_ops);
}
static void __net_exit sample_exit_net(struct net *net)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index a8d0ea95f894..e7b57e5071a3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tc_action_net_init(tn, &act_simp_ops, net);
+ return tc_action_net_init(tn, &act_simp_ops);
}
static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fbac62472e09..59949d61f20d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- return tc_action_net_init(tn, &act_skbedit_ops, net);
+ return tc_action_net_init(tn, &act_skbedit_ops);
}
static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 8e12d8897d2f..b642ad3d39dd 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tc_action_net_init(tn, &act_skbmod_ops, net);
+ return tc_action_net_init(tn, &act_skbmod_ops);
}
static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index c33faa373cf2..30c96274c638 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tc_action_net_init(tn, &act_tunnel_key_ops, net);
+ return tc_action_net_init(tn, &act_tunnel_key_ops);
}
static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 115fc33cc6d8..97f717a13ad5 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -26,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p;
int action;
int err;
u16 tci;
- spin_lock(&v->tcf_lock);
tcf_lastuse_update(&v->tcf_tm);
- bstats_update(&v->tcf_bstats, skb);
- action = v->tcf_action;
+ bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
/* Ensure 'data' points at mac_header prior calling vlan manipulating
* functions.
@@ -41,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
- switch (v->tcfv_action) {
+ rcu_read_lock();
+
+ action = READ_ONCE(v->tcf_action);
+
+ p = rcu_dereference(v->vlan_p);
+
+ switch (p->tcfv_action) {
case TCA_VLAN_ACT_POP:
err = skb_vlan_pop(skb);
if (err)
goto drop;
break;
case TCA_VLAN_ACT_PUSH:
- err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
- (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
+ err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid |
+ (p->tcfv_push_prio << VLAN_PRIO_SHIFT));
if (err)
goto drop;
break;
@@ -68,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
goto drop;
}
/* replace the vid */
- tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+ tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
/* replace prio bits, if tcfv_push_prio specified */
- if (v->tcfv_push_prio) {
+ if (p->tcfv_push_prio) {
tci &= ~VLAN_PRIO_MASK;
- tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+ tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
}
/* put updated tci as hwaccel tag */
- __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+ __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci);
break;
default:
BUG();
@@ -85,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
drop:
action = TC_ACT_SHOT;
- v->tcf_qstats.drops++;
+ qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+
unlock:
+ rcu_read_unlock();
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
- spin_unlock(&v->tcf_lock);
return action;
}
@@ -107,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
+ struct tcf_vlan_params *p, *p_old;
struct tc_vlan *parm;
struct tcf_vlan *v;
int action;
@@ -172,7 +179,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
- &act_vlan_ops, bind, false);
+ &act_vlan_ops, bind, true);
if (ret)
return ret;
@@ -185,46 +192,67 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
v = to_vlan(*a);
- spin_lock_bh(&v->tcf_lock);
-
- v->tcfv_action = action;
- v->tcfv_push_vid = push_vid;
- v->tcfv_push_prio = push_prio;
- v->tcfv_push_proto = push_proto;
+ ASSERT_RTNL();
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ if (ovr)
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
v->tcf_action = parm->action;
- spin_unlock_bh(&v->tcf_lock);
+ p_old = rtnl_dereference(v->vlan_p);
+
+ p->tcfv_action = action;
+ p->tcfv_push_vid = push_vid;
+ p->tcfv_push_prio = push_prio;
+ p->tcfv_push_proto = push_proto;
+
+ rcu_assign_pointer(v->vlan_p, p);
+
+ if (p_old)
+ kfree_rcu(p_old, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
}
+static void tcf_vlan_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p;
+
+ p = rcu_dereference_protected(v->vlan_p, 1);
+ kfree_rcu(p, rcu);
+}
+
static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
struct tc_vlan opt = {
.index = v->tcf_index,
.refcnt = v->tcf_refcnt - ref,
.bindcnt = v->tcf_bindcnt - bind,
.action = v->tcf_action,
- .v_action = v->tcfv_action,
+ .v_action = p->tcfv_action,
};
struct tcf_t t;
if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
- v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
- (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
+ if ((p->tcfv_action == TCA_VLAN_ACT_PUSH ||
+ p->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
+ (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
- v->tcfv_push_proto) ||
+ p->tcfv_push_proto) ||
(nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
- v->tcfv_push_prio))))
+ p->tcfv_push_prio))))
goto nla_put_failure;
tcf_tm_dump(&t, &v->tcf_tm);
@@ -260,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = {
.act = tcf_vlan,
.dump = tcf_vlan_dump,
.init = tcf_vlan_init,
+ .cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
.lookup = tcf_vlan_search,
.size = sizeof(struct tcf_vlan),
@@ -269,7 +298,7 @@ static __net_init int vlan_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tc_action_net_init(tn, &act_vlan_ops, net);
+ return tc_action_net_init(tn, &act_vlan_ops);
}
static void __net_exit vlan_exit_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 206e19f4fc01..ab255b421781 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1110,6 +1110,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
exts->actions[i++] = act;
exts->nr_actions = i;
}
+ exts->net = net;
}
#else
if ((exts->action && tb[exts->action]) ||
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 871351358c10..5f169ded347e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -87,16 +87,21 @@ static int basic_init(struct tcf_proto *tp)
return 0;
}
+static void __basic_delete_filter(struct basic_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_em_tree_destroy(&f->ematches);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void basic_delete_filter_work(struct work_struct *work)
{
struct basic_filter *f = container_of(work, struct basic_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- tcf_em_tree_destroy(&f->ematches);
+ __basic_delete_filter(f);
rtnl_unlock();
-
- kfree(f);
}
static void basic_delete_filter(struct rcu_head *head)
@@ -116,7 +121,10 @@ static void basic_destroy(struct tcf_proto *tp)
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
idr_remove_ext(&head->handle_idr, f->handle);
- call_rcu(&f->rcu, basic_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, basic_delete_filter);
+ else
+ __basic_delete_filter(f);
}
idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
@@ -130,6 +138,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
idr_remove_ext(&head->handle_idr, f->handle);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, basic_delete_filter);
*last = list_empty(&head->flist);
return 0;
@@ -225,6 +234,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->link, &fnew->link);
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, basic_delete_filter);
} else {
list_add_rcu(&fnew->link, &head->flist);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index dc9bd9a0070b..fb680dafac5a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -261,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
+ tcf_exts_put_net(&prog->exts);
if (cls_bpf_is_ebpf(prog))
bpf_prog_put(prog->filter);
@@ -297,7 +298,10 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+ if (tcf_exts_get_net(&prog->exts))
+ call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+ else
+ __cls_bpf_delete_prog(prog);
}
static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
@@ -526,6 +530,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
idr_replace_ext(&head->handle_idr, prog, handle);
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
+ tcf_exts_get_net(&oldprog->exts);
call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
} else {
list_add_rcu(&prog->link, &head->plist);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index a97e069bee89..309d5899265f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -60,15 +60,21 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void __cls_cgroup_destroy(struct cls_cgroup_head *head)
+{
+ tcf_exts_destroy(&head->exts);
+ tcf_em_tree_destroy(&head->ematches);
+ tcf_exts_put_net(&head->exts);
+ kfree(head);
+}
+
static void cls_cgroup_destroy_work(struct work_struct *work)
{
struct cls_cgroup_head *head = container_of(work,
struct cls_cgroup_head,
work);
rtnl_lock();
- tcf_exts_destroy(&head->exts);
- tcf_em_tree_destroy(&head->ematches);
- kfree(head);
+ __cls_cgroup_destroy(head);
rtnl_unlock();
}
@@ -124,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
goto errout;
rcu_assign_pointer(tp->root, new);
- if (head)
+ if (head) {
+ tcf_exts_get_net(&head->exts);
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ }
return 0;
errout:
tcf_exts_destroy(&new->exts);
@@ -138,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
/* Head can still be NULL due to cls_cgroup_init(). */
- if (head)
- call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ if (head) {
+ if (tcf_exts_get_net(&head->exts))
+ call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ else
+ __cls_cgroup_destroy(head);
+ }
}
static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 9c08fcdaf41c..25c2a888e1f0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -372,15 +372,21 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
-static void flow_destroy_filter_work(struct work_struct *work)
+static void __flow_destroy_filter(struct flow_filter *f)
{
- struct flow_filter *f = container_of(work, struct flow_filter, work);
-
- rtnl_lock();
del_timer_sync(&f->perturb_timer);
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
+ tcf_exts_put_net(&f->exts);
kfree(f);
+}
+
+static void flow_destroy_filter_work(struct work_struct *work)
+{
+ struct flow_filter *f = container_of(work, struct flow_filter, work);
+
+ rtnl_lock();
+ __flow_destroy_filter(f);
rtnl_unlock();
}
@@ -554,8 +560,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
*arg = fnew;
- if (fold)
+ if (fold) {
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, flow_destroy_filter);
+ }
return 0;
err2:
@@ -572,6 +580,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
struct flow_filter *f = arg;
list_del_rcu(&f->list);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, flow_destroy_filter);
*last = list_empty(&head->filters);
return 0;
@@ -596,7 +605,10 @@ static void flow_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
- call_rcu(&f->rcu, flow_destroy_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, flow_destroy_filter);
+ else
+ __flow_destroy_filter(f);
}
kfree_rcu(head, rcu);
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c99fa9e5be46..543a3e875d05 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -193,13 +193,19 @@ static int fl_init(struct tcf_proto *tp)
return 0;
}
+static void __fl_destroy_filter(struct cls_fl_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void fl_destroy_filter_work(struct work_struct *work)
{
struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __fl_destroy_filter(f);
rtnl_unlock();
}
@@ -282,7 +288,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
if (!tc_skip_hw(f->flags))
fl_hw_destroy_filter(tp, f);
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fl_destroy_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, fl_destroy_filter);
+ else
+ __fl_destroy_filter(f);
}
static void fl_destroy_sleepable(struct work_struct *work)
@@ -952,6 +961,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->list, &fnew->list);
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, fl_destroy_filter);
} else {
list_add_tail_rcu(&fnew->list, &head->filters);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 5908f56f76da..20f0de1a960a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -125,13 +125,19 @@ static int fw_init(struct tcf_proto *tp)
return 0;
}
+static void __fw_delete_filter(struct fw_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void fw_delete_filter_work(struct work_struct *work)
{
struct fw_filter *f = container_of(work, struct fw_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __fw_delete_filter(f);
rtnl_unlock();
}
@@ -157,7 +163,10 @@ static void fw_destroy(struct tcf_proto *tp)
RCU_INIT_POINTER(head->ht[h],
rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fw_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, fw_delete_filter);
+ else
+ __fw_delete_filter(f);
}
}
kfree_rcu(head, rcu);
@@ -182,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
if (pfp == f) {
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, fw_delete_filter);
ret = 0;
break;
@@ -302,6 +312,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
rcu_assign_pointer(*fp, fnew);
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, fw_delete_filter);
*arg = fnew;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 95dc997873e8..66d4e0099158 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -44,13 +44,19 @@ static int mall_init(struct tcf_proto *tp)
return 0;
}
+static void __mall_destroy(struct cls_mall_head *head)
+{
+ tcf_exts_destroy(&head->exts);
+ tcf_exts_put_net(&head->exts);
+ kfree(head);
+}
+
static void mall_destroy_work(struct work_struct *work)
{
struct cls_mall_head *head = container_of(work, struct cls_mall_head,
work);
rtnl_lock();
- tcf_exts_destroy(&head->exts);
- kfree(head);
+ __mall_destroy(head);
rtnl_unlock();
}
@@ -116,7 +122,10 @@ static void mall_destroy(struct tcf_proto *tp)
if (!tc_skip_hw(head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head);
- call_rcu(&head->rcu, mall_destroy_rcu);
+ if (tcf_exts_get_net(&head->exts))
+ call_rcu(&head->rcu, mall_destroy_rcu);
+ else
+ __mall_destroy(head);
}
static void *mall_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 4b14ccd8b8f2..ac9a5b8825b9 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -257,13 +257,19 @@ static int route4_init(struct tcf_proto *tp)
return 0;
}
+static void __route4_delete_filter(struct route4_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void route4_delete_filter_work(struct work_struct *work)
{
struct route4_filter *f = container_of(work, struct route4_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __route4_delete_filter(f);
rtnl_unlock();
}
@@ -297,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp)
next = rtnl_dereference(f->next);
RCU_INIT_POINTER(b->ht[h2], next);
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, route4_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, route4_delete_filter);
+ else
+ __route4_delete_filter(f);
}
}
RCU_INIT_POINTER(head->table[h1], NULL);
@@ -338,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
/* Delete it */
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, route4_delete_filter);
/* Strip RTNL protected tree */
@@ -541,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
*arg = f;
if (fold) {
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, route4_delete_filter);
}
return 0;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index bdbc541787f8..cf325625c99d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -285,13 +285,19 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
+static void __rsvp_delete_filter(struct rsvp_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
static void rsvp_delete_filter_work(struct work_struct *work)
{
struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
rtnl_lock();
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ __rsvp_delete_filter(f);
rtnl_unlock();
}
@@ -310,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
- call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+ else
+ __rsvp_delete_filter(f);
}
static void rsvp_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index d6abfa6757f2..67467ae24c97 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -142,13 +142,19 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
+static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
+{
+ tcf_exts_destroy(&r->exts);
+ tcf_exts_put_net(&r->exts);
+}
+
static void tcindex_destroy_rexts_work(struct work_struct *work)
{
struct tcindex_filter_result *r;
r = container_of(work, struct tcindex_filter_result, work);
rtnl_lock();
- tcf_exts_destroy(&r->exts);
+ __tcindex_destroy_rexts(r);
rtnl_unlock();
}
@@ -161,14 +167,20 @@ static void tcindex_destroy_rexts(struct rcu_head *head)
tcf_queue_work(&r->work);
}
+static void __tcindex_destroy_fexts(struct tcindex_filter *f)
+{
+ tcf_exts_destroy(&f->result.exts);
+ tcf_exts_put_net(&f->result.exts);
+ kfree(f);
+}
+
static void tcindex_destroy_fexts_work(struct work_struct *work)
{
struct tcindex_filter *f = container_of(work, struct tcindex_filter,
work);
rtnl_lock();
- tcf_exts_destroy(&f->result.exts);
- kfree(f);
+ __tcindex_destroy_fexts(f);
rtnl_unlock();
}
@@ -213,10 +225,17 @@ found:
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
- if (f)
- call_rcu(&f->rcu, tcindex_destroy_fexts);
- else
- call_rcu(&r->rcu, tcindex_destroy_rexts);
+ if (f) {
+ if (tcf_exts_get_net(&f->result.exts))
+ call_rcu(&f->rcu, tcindex_destroy_fexts);
+ else
+ __tcindex_destroy_fexts(f);
+ } else {
+ if (tcf_exts_get_net(&r->exts))
+ call_rcu(&r->rcu, tcindex_destroy_rexts);
+ else
+ __tcindex_destroy_rexts(r);
+ }
*last = false;
return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 2737b71854c9..ac152b4f4247 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -399,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
bool free_pf)
{
tcf_exts_destroy(&n->exts);
+ tcf_exts_put_net(&n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
@@ -476,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
RCU_INIT_POINTER(*kp, key->next);
tcf_unbind_filter(tp, &key->res);
+ tcf_exts_get_net(&key->exts);
call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
}
@@ -590,7 +592,10 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
tcf_unbind_filter(tp, &n->res);
u32_remove_hw_knode(tp, n->handle);
idr_remove_ext(&ht->handle_idr, n->handle);
- call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+ if (tcf_exts_get_net(&n->exts))
+ call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+ else
+ u32_destroy_key(n->tp, n, true);
}
}
}
@@ -949,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
+ tcf_exts_get_net(&n->exts);
call_rcu(&n->rcu, u32_delete_key_rcu);
return 0;
}
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index bdb533b7fb8c..7a72980c1509 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -212,7 +212,7 @@ static void cbs_disable_offload(struct net_device *dev,
cbs.queue = q->queue;
cbs.enable = 0;
- err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
if (err < 0)
pr_warn("Couldn't disable CBS offload for queue %d\n",
cbs.queue);
@@ -236,7 +236,7 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
cbs.idleslope = opt->idleslope;
cbs.sendslope = opt->sendslope;
- err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
if (err < 0)
return err;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 4d5ed45123f0..b85885a9d8a1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -50,7 +50,8 @@ static void mqprio_destroy(struct Qdisc *sch)
switch (priv->mode) {
case TC_MQPRIO_MODE_DCB:
case TC_MQPRIO_MODE_CHANNEL:
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
+ dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_MQPRIO,
&mqprio);
break;
default:
@@ -265,7 +266,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
return -EINVAL;
}
err = dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_MQPRIO,
+ TC_SETUP_QDISC_MQPRIO,
&mqprio);
if (err)
return err;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index db0228a65e8c..b686e755fda9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,8 +77,8 @@ struct netem_sched_data {
struct qdisc_watchdog watchdog;
- psched_tdiff_t latency;
- psched_tdiff_t jitter;
+ s64 latency;
+ s64 jitter;
u32 loss;
u32 ecn;
@@ -135,6 +135,13 @@ struct netem_sched_data {
u32 a5; /* p23 used only in 4-states */
} clg;
+ struct tc_netem_slot slot_config;
+ struct slotstate {
+ u64 slot_next;
+ s32 packets_left;
+ s32 bytes_left;
+ } slot;
+
};
/* Time stamp put into socket buffer control block
@@ -145,7 +152,7 @@ struct netem_sched_data {
* we save skb->tstamp value in skb->cb[] before destroying it.
*/
struct netem_skb_cb {
- psched_time_t time_to_send;
+ u64 time_to_send;
};
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
@@ -305,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q)
* std deviation sigma. Uses table lookup to approximate the desired
* distribution, and a uniformly-distributed pseudo-random source.
*/
-static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
- struct crndstate *state,
- const struct disttable *dist)
+static s64 tabledist(s64 mu, s64 sigma,
+ struct crndstate *state,
+ const struct disttable *dist)
{
- psched_tdiff_t x;
+ s64 x;
long t;
u32 rnd;
@@ -332,10 +339,10 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
-static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
+static u64 packet_len_2_sched_time(unsigned int len,
+ struct netem_sched_data *q)
{
- u64 ticks;
-
+ u64 offset;
len += q->packet_overhead;
if (q->cell_size) {
@@ -345,11 +352,9 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
cells++;
len = cells * (q->cell_size + q->cell_overhead);
}
-
- ticks = (u64)len * NSEC_PER_SEC;
-
- do_div(ticks, q->rate);
- return PSCHED_NS2TICKS(ticks);
+ offset = (u64)len * NSEC_PER_SEC;
+ do_div(offset, q->rate);
+ return offset;
}
static void tfifo_reset(struct Qdisc *sch)
@@ -369,7 +374,7 @@ static void tfifo_reset(struct Qdisc *sch)
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
+ u64 tnext = netem_skb_cb(nskb)->time_to_send;
struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
while (*p) {
@@ -515,13 +520,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (q->gap == 0 || /* not doing reordering */
q->counter < q->gap - 1 || /* inside last reordering gap */
q->reorder < get_crandom(&q->reorder_cor)) {
- psched_time_t now;
- psched_tdiff_t delay;
+ u64 now;
+ s64 delay;
delay = tabledist(q->latency, q->jitter,
&q->delay_cor, q->delay_dist);
- now = psched_get_time();
+ now = ktime_get_ns();
if (q->rate) {
struct netem_skb_cb *last = NULL;
@@ -547,7 +552,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* from delay.
*/
delay -= last->time_to_send - now;
- delay = max_t(psched_tdiff_t, 0, delay);
+ delay = max_t(s64, 0, delay);
now = last->time_to_send;
}
@@ -562,7 +567,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* Do re-ordering by putting one out of N packets at the front
* of the queue.
*/
- cb->time_to_send = psched_get_time();
+ cb->time_to_send = ktime_get_ns();
q->counter = 0;
netem_enqueue_skb_head(&sch->q, skb);
@@ -593,6 +598,20 @@ finish_segs:
return NET_XMIT_SUCCESS;
}
+/* Delay the next round with a new future slot with a
+ * correct number of bytes and packets.
+ */
+
+static void get_slot_next(struct netem_sched_data *q, u64 now)
+{
+ q->slot.slot_next = now + q->slot_config.min_delay +
+ (prandom_u32() *
+ (q->slot_config.max_delay -
+ q->slot_config.min_delay) >> 32);
+ q->slot.packets_left = q->slot_config.max_packets;
+ q->slot.bytes_left = q->slot_config.max_bytes;
+}
+
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -609,15 +628,18 @@ deliver:
}
p = rb_first(&q->t_root);
if (p) {
- psched_time_t time_to_send;
+ u64 time_to_send;
+ u64 now = ktime_get_ns();
skb = rb_to_skb(p);
/* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send;
- if (time_to_send <= psched_get_time()) {
- rb_erase(p, &q->t_root);
+ if (q->slot.slot_next && q->slot.slot_next < time_to_send)
+ get_slot_next(q, now);
+ if (time_to_send <= now && q->slot.slot_next <= now) {
+ rb_erase(p, &q->t_root);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
@@ -636,6 +658,14 @@ deliver:
skb->tstamp = 0;
#endif
+ if (q->slot.slot_next) {
+ q->slot.packets_left--;
+ q->slot.bytes_left -= qdisc_pkt_len(skb);
+ if (q->slot.packets_left <= 0 ||
+ q->slot.bytes_left <= 0)
+ get_slot_next(q, now);
+ }
+
if (q->qdisc) {
unsigned int pkt_len = qdisc_pkt_len(skb);
struct sk_buff *to_free = NULL;
@@ -659,7 +689,10 @@ deliver:
if (skb)
goto deliver;
}
- qdisc_watchdog_schedule(&q->watchdog, time_to_send);
+
+ qdisc_watchdog_schedule_ns(&q->watchdog,
+ max(time_to_send,
+ q->slot.slot_next));
}
if (q->qdisc) {
@@ -690,6 +723,7 @@ static void dist_free(struct disttable *d)
* Distribution data is a variable size payload containing
* signed 16 bit values.
*/
+
static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -720,6 +754,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return 0;
}
+static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
+{
+ const struct tc_netem_slot *c = nla_data(attr);
+
+ q->slot_config = *c;
+ if (q->slot_config.max_packets == 0)
+ q->slot_config.max_packets = INT_MAX;
+ if (q->slot_config.max_bytes == 0)
+ q->slot_config.max_bytes = INT_MAX;
+ q->slot.packets_left = q->slot_config.max_packets;
+ q->slot.bytes_left = q->slot_config.max_bytes;
+ if (q->slot_config.min_delay | q->slot_config.max_delay)
+ q->slot.slot_next = ktime_get_ns();
+ else
+ q->slot.slot_next = 0;
+}
+
static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
{
const struct tc_netem_corr *c = nla_data(attr);
@@ -821,6 +872,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
[TCA_NETEM_ECN] = { .type = NLA_U32 },
[TCA_NETEM_RATE64] = { .type = NLA_U64 },
+ [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
+ [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
+ [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -888,8 +942,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
sch->limit = qopt->limit;
- q->latency = qopt->latency;
- q->jitter = qopt->jitter;
+ q->latency = PSCHED_TICKS2NS(qopt->latency);
+ q->jitter = PSCHED_TICKS2NS(qopt->jitter);
q->limit = qopt->limit;
q->gap = qopt->gap;
q->counter = 0;
@@ -918,9 +972,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
q->rate = max_t(u64, q->rate,
nla_get_u64(tb[TCA_NETEM_RATE64]));
+ if (tb[TCA_NETEM_LATENCY64])
+ q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
+
+ if (tb[TCA_NETEM_JITTER64])
+ q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
+
if (tb[TCA_NETEM_ECN])
q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
+ if (tb[TCA_NETEM_SLOT])
+ get_slot(q, tb[TCA_NETEM_SLOT]);
+
return ret;
}
@@ -1010,9 +1073,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_reorder reorder;
struct tc_netem_corrupt corrupt;
struct tc_netem_rate rate;
+ struct tc_netem_slot slot;
- qopt.latency = q->latency;
- qopt.jitter = q->jitter;
+ qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ UINT_MAX);
+ qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
qopt.gap = q->gap;
@@ -1020,6 +1086,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
goto nla_put_failure;
+ if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
+ goto nla_put_failure;
+
+ if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
+ goto nla_put_failure;
+
cor.delay_corr = q->delay_cor.rho;
cor.loss_corr = q->loss_cor.rho;
cor.dup_corr = q->dup_cor.rho;
@@ -1056,6 +1128,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (dump_loss_model(q, skb) != 0)
goto nla_put_failure;
+ if (q->slot_config.min_delay | q->slot_config.max_delay) {
+ slot = q->slot_config;
+ if (slot.max_packets == INT_MAX)
+ slot.max_packets = 0;
+ if (slot.max_bytes == INT_MAX)
+ slot.max_bytes = 0;
+ if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
+ goto nla_put_failure;
+ }
+
return nla_nest_end(skb, nla);
nla_put_failure:
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index fdfdb56aaae2..7f8ea9e297c3 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
#include <net/red.h>
@@ -148,11 +149,37 @@ static void red_reset(struct Qdisc *sch)
red_restart(&q->vars);
}
+static int red_offload(struct Qdisc *sch, bool enable)
+{
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload opt = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ };
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ if (enable) {
+ opt.command = TC_RED_REPLACE;
+ opt.set.min = q->parms.qth_min >> q->parms.Wlog;
+ opt.set.max = q->parms.qth_max >> q->parms.Wlog;
+ opt.set.probability = q->parms.max_P;
+ opt.set.is_ecn = red_use_ecn(q);
+ } else {
+ opt.command = TC_RED_DESTROY;
+ }
+
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+}
+
static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
del_timer_sync(&q->adapt_timer);
+ red_offload(sch, false);
qdisc_destroy(q->qdisc);
}
@@ -219,6 +246,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
red_start_of_idle_period(&q->vars);
sch_tree_unlock(sch);
+ red_offload(sch, true);
return 0;
}
@@ -244,6 +272,35 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
return red_change(sch, opt);
}
+static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload hw_stats = {
+ .command = TC_RED_STATS,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ {
+ .stats.bstats = &sch->bstats,
+ .stats.qstats = &sch->qstats,
+ },
+ };
+ int err;
+
+ opt->flags &= ~TC_RED_OFFLOADED;
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return 0;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats);
+ if (err == -EOPNOTSUPP)
+ return 0;
+
+ if (!err)
+ opt->flags |= TC_RED_OFFLOADED;
+
+ return err;
+}
+
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -257,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
.Plog = q->parms.Plog,
.Scell_log = q->parms.Scell_log,
};
+ int err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
+ err = red_dump_offload(sch, &opt);
+ if (err)
+ goto nla_put_failure;
+
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
@@ -275,6 +337,7 @@ nla_put_failure:
static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
struct tc_red_xstats st = {
.early = q->stats.prob_drop + q->stats.forced_drop,
.pdrop = q->stats.pdrop,
@@ -282,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.marked = q->stats.prob_mark + q->stats.forced_mark,
};
+ if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
+ struct red_stats hw_stats = {0};
+ struct tc_red_qopt_offload hw_stats_request = {
+ .command = TC_RED_XSTATS,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ {
+ .xstats = &hw_stats,
+ },
+ };
+ if (!dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_RED,
+ &hw_stats_request)) {
+ st.early += hw_stats.prob_drop + hw_stats.forced_drop;
+ st.pdrop += hw_stats.pdrop;
+ st.other += hw_stats.other;
+ st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
+ }
+ }
+
return gnet_stats_copy_app(d, &st, sizeof(st));
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 0531b41d1f2d..74b9d916a58b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
return sizeof(struct switchdev_obj_port_vlan);
case SWITCHDEV_OBJ_ID_PORT_MDB:
return sizeof(struct switchdev_obj_port_mdb);
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ return sizeof(struct switchdev_obj_port_mdb);
default:
BUG();
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 870b9b8f877a..6bce0b1117bd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l)
static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq);
static void link_print(struct tipc_link *l, const char *str);
static int tipc_link_build_nack_msg(struct tipc_link *l,
@@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
}
if (state || probe || setup)
- tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
return rc;
}
@@ -1174,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
/* Unicast ACK */
l->rcv_unacked = 0;
l->stats.sent_acks++;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
return 0;
}
@@ -1188,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
if (l->state == LINK_ESTABLISHING)
mtyp = ACTIVATE_MSG;
- tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
/* Inform peer that this endpoint is going down if applicable */
skb = skb_peek_tail(xmitq);
@@ -1215,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
}
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
return 0;
}
@@ -1289,7 +1290,8 @@ drop:
}
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq)
{
struct tipc_link *bcl = l->bc_rcvlink;
@@ -1337,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_seq_gap(hdr, rcvgap);
msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
msg_set_probe(hdr, probe);
+ msg_set_is_keepalive(hdr, probe || probe_reply);
tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + dlen);
skb_trim(skb, INT_H_SIZE + dlen);
@@ -1442,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 rcv_nxt = l->rcv_nxt;
u16 dlen = msg_data_sz(hdr);
int mtyp = msg_type(hdr);
+ bool reply = msg_probe(hdr);
void *data;
char *if_name;
int rc = 0;
@@ -1528,9 +1532,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
rcvgap = peers_snd_nxt - l->rcv_nxt;
- if (rcvgap || (msg_probe(hdr)))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
- 0, 0, xmitq);
+ if (rcvgap || reply)
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
+ rcvgap, 0, 0, xmitq);
tipc_link_release_pkts(l, ack);
/* If NACK, retransmit will now start at right position */
@@ -2122,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
struct sk_buff_head *xmitq)
{
l->tolerance = tol;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
}
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
struct sk_buff_head *xmitq)
{
l->priority = prio;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq);
}
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index cedf811317fb..bf8f57ccc70c 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -226,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 19, 1, d);
}
+static inline int msg_is_keepalive(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 19, 1);
+}
+
+static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 19, 1, d);
+}
+
static inline int msg_src_droppable(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 1);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 82d20ee34581..347ab31574d5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -266,8 +266,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto lock;
}
- daddr = (xfrm_address_t *)(skb_network_header(skb) +
- XFRM_SPI_SKB_CB(skb)->daddroff);
family = XFRM_SPI_SKB_CB(skb)->family;
/* if tunnel is present override skb->mark value with tunnel i_key */
@@ -294,6 +292,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
+ daddr = (xfrm_address_t *)(skb_network_header(skb) +
+ XFRM_SPI_SKB_CB(skb)->daddroff);
do {
if (skb->sp->len == XFRM_MAX_DEPTH) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f02b1743b239..2f57722f5d03 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1360,36 +1360,29 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
struct net *net = xp_net(policy);
int nx;
int i, error;
- xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
- xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
xfrm_address_t tmp;
for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
struct xfrm_state *x;
- xfrm_address_t *remote = daddr;
- xfrm_address_t *local = saddr;
+ xfrm_address_t *local;
+ xfrm_address_t *remote;
struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
- if (tmpl->mode == XFRM_MODE_TUNNEL ||
- tmpl->mode == XFRM_MODE_BEET) {
- remote = &tmpl->id.daddr;
- local = &tmpl->saddr;
- if (xfrm_addr_any(local, tmpl->encap_family)) {
- error = xfrm_get_saddr(net, fl->flowi_oif,
- &tmp, remote,
- tmpl->encap_family, 0);
- if (error)
- goto fail;
- local = &tmp;
- }
+ remote = &tmpl->id.daddr;
+ local = &tmpl->saddr;
+ if (xfrm_addr_any(local, tmpl->encap_family)) {
+ error = xfrm_get_saddr(net, fl->flowi_oif,
+ &tmp, remote,
+ tmpl->encap_family, 0);
+ if (error)
+ goto fail;
+ local = &tmp;
}
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
- daddr = remote;
- saddr = local;
continue;
}
if (x) {
@@ -1786,19 +1779,23 @@ void xfrm_policy_cache_flush(void)
put_online_cpus();
}
-static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
+ struct xfrm_state * const xfrm[],
+ int num)
{
- unsigned int num_pols = xdst->num_pols;
- unsigned int pol_dead = 0, i;
+ const struct dst_entry *dst = &xdst->u.dst;
+ int i;
- for (i = 0; i < num_pols; i++)
- pol_dead |= xdst->pols[i]->walk.dead;
+ if (xdst->num_xfrms != num)
+ return false;
- /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- if (pol_dead)
- xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+ for (i = 0; i < num; i++) {
+ if (!dst || dst->xfrm != xfrm[i])
+ return false;
+ dst = dst->child;
+ }
- return pol_dead;
+ return xfrm_bundle_ok(xdst);
}
static struct xfrm_dst *
@@ -1812,26 +1809,28 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
struct dst_entry *dst;
int err;
+ /* Try to instantiate a bundle */
+ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
+ if (err <= 0) {
+ if (err != 0 && err != -EAGAIN)
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+ return ERR_PTR(err);
+ }
+
xdst = this_cpu_read(xfrm_last_dst);
if (xdst &&
xdst->u.dst.dev == dst_orig->dev &&
xdst->num_pols == num_pols &&
- !xfrm_pol_dead(xdst) &&
memcmp(xdst->pols, pols,
sizeof(struct xfrm_policy *) * num_pols) == 0 &&
- xfrm_bundle_ok(xdst)) {
+ xfrm_xdst_can_reuse(xdst, xfrm, err)) {
dst_hold(&xdst->u.dst);
+ while (err > 0)
+ xfrm_state_put(xfrm[--err]);
return xdst;
}
old = xdst;
- /* Try to instantiate a bundle */
- err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
- if (err <= 0) {
- if (err != 0 && err != -EAGAIN)
- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
- return ERR_PTR(err);
- }
dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
if (IS_ERR(dst)) {