summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/filter.c28
-rw-r--r--net/core/flow_dissector.c31
-rw-r--r--net/core/skbuff.c28
-rw-r--r--net/core/sysctl_net_core.c5
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/sched/cls_flower.c129
8 files changed, 233 insertions, 6 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index bffb5253e778..1d33ce03365f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3447,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
EXPORT_SYMBOL(rps_needed);
+struct static_key rfs_needed __read_mostly;
+EXPORT_SYMBOL(rfs_needed);
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
diff --git a/net/core/filter.c b/net/core/filter.c
index b751202e12f8..b1461708a977 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2234,7 +2234,28 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
-bool bpf_helper_changes_skb_data(void *func)
+BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
+{
+ void *data = xdp->data + offset;
+
+ if (unlikely(data < xdp->data_hard_start ||
+ data > xdp->data_end - ETH_HLEN))
+ return -EINVAL;
+
+ xdp->data = data;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
+ .func = bpf_xdp_adjust_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
func == bpf_skb_vlan_pop ||
@@ -2244,7 +2265,8 @@ bool bpf_helper_changes_skb_data(void *func)
func == bpf_skb_change_tail ||
func == bpf_skb_pull_data ||
func == bpf_l3_csum_replace ||
- func == bpf_l4_csum_replace)
+ func == bpf_l4_csum_replace ||
+ func == bpf_xdp_adjust_head)
return true;
return false;
@@ -2670,6 +2692,8 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_xdp_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_xdp_adjust_head:
+ return &bpf_xdp_adjust_head_proto;
default:
return sk_filter_func_proto(func_id);
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1eb6f949e5b2..d6447dc10371 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -58,6 +58,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
EXPORT_SYMBOL(skb_flow_dissector_init);
/**
+ * skb_flow_get_be16 - extract be16 entity
+ * @skb: sk_buff to extract from
+ * @poff: offset to extract at
+ * @data: raw buffer pointer to the packet
+ * @hlen: packet header length
+ *
+ * The function will try to retrieve a be32 entity at
+ * offset poff
+ */
+__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data,
+ int hlen)
+{
+ __be16 *u, _u;
+
+ u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
+ if (u)
+ return *u;
+
+ return 0;
+}
+
+/**
* __skb_flow_get_ports - extract the upper layer ports and return them
* @skb: sk_buff to extract the ports from
* @thoff: transport header offset
@@ -117,6 +139,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
+ struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
struct flow_dissector_key_keyid *key_keyid;
@@ -546,6 +569,14 @@ ip_proto_again:
data, hlen);
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP)) {
+ key_icmp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP,
+ target_container);
+ key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
+ }
+
out_good:
ret = true;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b45cd1494243..84151cf40aeb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4931,3 +4931,31 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
return clone;
}
EXPORT_SYMBOL(pskb_extract);
+
+/**
+ * skb_condense - try to get rid of fragments/frag_list if possible
+ * @skb: buffer
+ *
+ * Can be used to save memory before skb is added to a busy queue.
+ * If packet has bytes in frags and enough tail room in skb->head,
+ * pull all of them, so that we can free the frags right now and adjust
+ * truesize.
+ * Notes:
+ * We do not reallocate skb->head thus can not fail.
+ * Caller must re-evaluate skb->truesize if needed.
+ */
+void skb_condense(struct sk_buff *skb)
+{
+ if (!skb->data_len ||
+ skb->data_len > skb->end - skb->tail ||
+ skb_cloned(skb))
+ return;
+
+ /* Nice, we can free page frag(s) right now */
+ __pskb_pull_tail(skb, skb->data_len);
+
+ /* Now adjust skb->truesize, since __pskb_pull_tail() does
+ * not do this.
+ */
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0df2aa652530..2a46e4009f62 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -79,10 +79,13 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
if (sock_table != orig_sock_table) {
rcu_assign_pointer(rps_sock_flow_table, sock_table);
- if (sock_table)
+ if (sock_table) {
static_key_slow_inc(&rps_needed);
+ static_key_slow_inc(&rfs_needed);
+ }
if (orig_sock_table) {
static_key_slow_dec(&rps_needed);
+ static_key_slow_dec(&rfs_needed);
synchronize_rcu();
vfree(orig_sock_table);
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 691146abde2d..f79d7a8ab1c6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1047,12 +1047,12 @@ int icmp_rcv(struct sk_buff *skb)
if (success) {
consume_skb(skb);
- return 0;
+ return NET_RX_SUCCESS;
}
drop:
kfree_skb(skb);
- return 0;
+ return NET_RX_DROP;
csum_error:
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 16d88ba9ff1c..f5628ada47b5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1199,7 +1199,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, delta, amt, err = -ENOMEM;
- int size = skb->truesize;
+ int size;
/* try to avoid the costly atomic add/sub pair when the receive
* queue is full; always allow at least a packet
@@ -1208,6 +1208,16 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
if (rmem > sk->sk_rcvbuf)
goto drop;
+ /* Under mem pressure, it might be helpful to help udp_recvmsg()
+ * having linear skbs :
+ * - Reduce memory overhead and thus increase receive queue capacity
+ * - Less cache line misses at copyout() time
+ * - Less work at consume_skb() (less alien page frag freeing)
+ */
+ if (rmem > (sk->sk_rcvbuf >> 1))
+ skb_condense(skb);
+ size = skb->truesize;
+
/* we drop only if the receive buf is full and the receive
* queue contains some other skb
*/
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 29a9e6d9f274..e040c5140f61 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -39,6 +39,7 @@ struct fl_flow_key {
struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_ports tp;
+ struct flow_dissector_key_icmp icmp;
struct flow_dissector_key_keyid enc_key_id;
union {
struct flow_dissector_key_ipv4_addrs enc_ipv4;
@@ -386,6 +387,16 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -420,6 +431,39 @@ static void fl_set_key_vlan(struct nlattr **tb,
}
}
+static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
+ u32 *dissector_key, u32 *dissector_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (flower_mask & flower_flag_bit) {
+ *dissector_mask |= dissector_flag_bit;
+ if (flower_key & flower_flag_bit)
+ *dissector_key |= dissector_flag_bit;
+ }
+}
+
+static void fl_set_key_flags(struct nlattr **tb,
+ u32 *flags_key, u32 *flags_mask)
+{
+ u32 key, mask;
+
+ if (!tb[TCA_FLOWER_KEY_FLAGS])
+ return;
+
+ key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
+
+ if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
+ mask = ~0;
+ else
+ mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+
+ *flags_key = 0;
+ *flags_mask = 0;
+
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
@@ -502,6 +546,26 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
&mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
sizeof(key->tp.dst));
+ } else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code));
}
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
@@ -546,6 +610,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
sizeof(key->enc_tp.dst));
+ fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
+
return 0;
}
@@ -612,6 +678,8 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_PORTS, tp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ICMP, icmp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
@@ -880,6 +948,42 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
return 0;
}
+static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
+ u32 *flower_key, u32 *flower_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (dissector_mask & dissector_flag_bit) {
+ *flower_mask |= flower_flag_bit;
+ if (dissector_key & dissector_flag_bit)
+ *flower_key |= flower_flag_bit;
+ }
+}
+
+static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
+{
+ u32 key, mask;
+ __be32 _key, _mask;
+ int err;
+
+ if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
+ return 0;
+
+ key = 0;
+ mask = 0;
+
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+
+ _key = cpu_to_be32(key);
+ _mask = cpu_to_be32(mask);
+
+ err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
+ if (err)
+ return err;
+
+ return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
+}
+
static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
@@ -977,6 +1081,28 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
&mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6 &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
(fl_dump_key_val(skb, &key->enc_ipv4.src,
@@ -1015,6 +1141,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
sizeof(key->enc_tp.dst)))
goto nla_put_failure;
+ if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
+ goto nla_put_failure;
+
nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
if (tcf_exts_dump(skb, &f->exts))