diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 28 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 31 | ||||
-rw-r--r-- | net/core/skbuff.c | 28 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 5 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 4 | ||||
-rw-r--r-- | net/ipv4/udp.c | 12 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 129 |
8 files changed, 233 insertions, 6 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index bffb5253e778..1d33ce03365f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3447,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask); struct static_key rps_needed __read_mostly; EXPORT_SYMBOL(rps_needed); +struct static_key rfs_needed __read_mostly; +EXPORT_SYMBOL(rfs_needed); static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, diff --git a/net/core/filter.c b/net/core/filter.c index b751202e12f8..b1461708a977 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2234,7 +2234,28 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; -bool bpf_helper_changes_skb_data(void *func) +BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) +{ + void *data = xdp->data + offset; + + if (unlikely(data < xdp->data_hard_start || + data > xdp->data_end - ETH_HLEN)) + return -EINVAL; + + xdp->data = data; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { + .func = bpf_xdp_adjust_head, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || func == bpf_skb_vlan_pop || @@ -2244,7 +2265,8 @@ bool bpf_helper_changes_skb_data(void *func) func == bpf_skb_change_tail || func == bpf_skb_pull_data || func == bpf_l3_csum_replace || - func == bpf_l4_csum_replace) + func == bpf_l4_csum_replace || + func == bpf_xdp_adjust_head) return true; return false; @@ -2670,6 +2692,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_xdp_event_output_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_xdp_adjust_head: + return &bpf_xdp_adjust_head_proto; default: return sk_filter_func_proto(func_id); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1eb6f949e5b2..d6447dc10371 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -58,6 +58,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, EXPORT_SYMBOL(skb_flow_dissector_init); /** + * skb_flow_get_be16 - extract be16 entity + * @skb: sk_buff to extract from + * @poff: offset to extract at + * @data: raw buffer pointer to the packet + * @hlen: packet header length + * + * The function will try to retrieve a be32 entity at + * offset poff + */ +__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data, + int hlen) +{ + __be16 *u, _u; + + u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u); + if (u) + return *u; + + return 0; +} + +/** * __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from * @thoff: transport header offset @@ -117,6 +139,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; struct flow_dissector_key_keyid *key_keyid; @@ -546,6 +569,14 @@ ip_proto_again: data, hlen); } + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ICMP)) { + key_icmp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ICMP, + target_container); + key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen); + } + out_good: ret = true; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b45cd1494243..84151cf40aeb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4931,3 +4931,31 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off, return clone; } EXPORT_SYMBOL(pskb_extract); + +/** + * skb_condense - try to get rid of fragments/frag_list if possible + * @skb: buffer + * + * Can be used to save memory before skb is added to a busy queue. + * If packet has bytes in frags and enough tail room in skb->head, + * pull all of them, so that we can free the frags right now and adjust + * truesize. + * Notes: + * We do not reallocate skb->head thus can not fail. + * Caller must re-evaluate skb->truesize if needed. + */ +void skb_condense(struct sk_buff *skb) +{ + if (!skb->data_len || + skb->data_len > skb->end - skb->tail || + skb_cloned(skb)) + return; + + /* Nice, we can free page frag(s) right now */ + __pskb_pull_tail(skb, skb->data_len); + + /* Now adjust skb->truesize, since __pskb_pull_tail() does + * not do this. + */ + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); +} diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0df2aa652530..2a46e4009f62 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -79,10 +79,13 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); - if (sock_table) + if (sock_table) { static_key_slow_inc(&rps_needed); + static_key_slow_inc(&rfs_needed); + } if (orig_sock_table) { static_key_slow_dec(&rps_needed); + static_key_slow_dec(&rfs_needed); synchronize_rcu(); vfree(orig_sock_table); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 691146abde2d..f79d7a8ab1c6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1047,12 +1047,12 @@ int icmp_rcv(struct sk_buff *skb) if (success) { consume_skb(skb); - return 0; + return NET_RX_SUCCESS; } drop: kfree_skb(skb); - return 0; + return NET_RX_DROP; csum_error: __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); error: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 16d88ba9ff1c..f5628ada47b5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1199,7 +1199,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; int rmem, delta, amt, err = -ENOMEM; - int size = skb->truesize; + int size; /* try to avoid the costly atomic add/sub pair when the receive * queue is full; always allow at least a packet @@ -1208,6 +1208,16 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) if (rmem > sk->sk_rcvbuf) goto drop; + /* Under mem pressure, it might be helpful to help udp_recvmsg() + * having linear skbs : + * - Reduce memory overhead and thus increase receive queue capacity + * - Less cache line misses at copyout() time + * - Less work at consume_skb() (less alien page frag freeing) + */ + if (rmem > (sk->sk_rcvbuf >> 1)) + skb_condense(skb); + size = skb->truesize; + /* we drop only if the receive buf is full and the receive * queue contains some other skb */ diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 29a9e6d9f274..e040c5140f61 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -39,6 +39,7 @@ struct fl_flow_key { struct flow_dissector_key_ipv6_addrs ipv6; }; struct flow_dissector_key_ports tp; + struct flow_dissector_key_icmp icmp; struct flow_dissector_key_keyid enc_key_id; union { struct flow_dissector_key_ipv4_addrs enc_ipv4; @@ -386,6 +387,16 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ICMPV6_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -420,6 +431,39 @@ static void fl_set_key_vlan(struct nlattr **tb, } } +static void fl_set_key_flag(u32 flower_key, u32 flower_mask, + u32 *dissector_key, u32 *dissector_mask, + u32 flower_flag_bit, u32 dissector_flag_bit) +{ + if (flower_mask & flower_flag_bit) { + *dissector_mask |= dissector_flag_bit; + if (flower_key & flower_flag_bit) + *dissector_key |= dissector_flag_bit; + } +} + +static void fl_set_key_flags(struct nlattr **tb, + u32 *flags_key, u32 *flags_mask) +{ + u32 key, mask; + + if (!tb[TCA_FLOWER_KEY_FLAGS]) + return; + + key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS])); + + if (!tb[TCA_FLOWER_KEY_FLAGS_MASK]) + mask = ~0; + else + mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK])); + + *flags_key = 0; + *flags_mask = 0; + + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask) { @@ -502,6 +546,26 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST, &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, sizeof(key->tp.dst)); + } else if (key->basic.n_proto == htons(ETH_P_IP) && + key->basic.ip_proto == IPPROTO_ICMP) { + fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE, + &mask->icmp.type, + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK, + sizeof(key->icmp.type)); + fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE, + &mask->icmp.code, + TCA_FLOWER_KEY_ICMPV4_CODE_MASK, + sizeof(key->icmp.code)); + } else if (key->basic.n_proto == htons(ETH_P_IPV6) && + key->basic.ip_proto == IPPROTO_ICMPV6) { + fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE, + &mask->icmp.type, + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK, + sizeof(key->icmp.type)); + fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE, + &mask->icmp.code, + TCA_FLOWER_KEY_ICMPV4_CODE_MASK, + sizeof(key->icmp.code)); } if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || @@ -546,6 +610,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, sizeof(key->enc_tp.dst)); + fl_set_key_flags(tb, &key->control.flags, &mask->control.flags); + return 0; } @@ -612,6 +678,8 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ICMP, icmp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); @@ -880,6 +948,42 @@ static int fl_dump_key_vlan(struct sk_buff *skb, return 0; } +static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask, + u32 *flower_key, u32 *flower_mask, + u32 flower_flag_bit, u32 dissector_flag_bit) +{ + if (dissector_mask & dissector_flag_bit) { + *flower_mask |= flower_flag_bit; + if (dissector_key & dissector_flag_bit) + *flower_key |= flower_flag_bit; + } +} + +static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) +{ + u32 key, mask; + __be32 _key, _mask; + int err; + + if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask))) + return 0; + + key = 0; + mask = 0; + + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); + + _key = cpu_to_be32(key); + _mask = cpu_to_be32(mask); + + err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key); + if (err) + return err; + + return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); +} + static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { @@ -977,6 +1081,28 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; + else if (key->basic.n_proto == htons(ETH_P_IP) && + key->basic.ip_proto == IPPROTO_ICMP && + (fl_dump_key_val(skb, &key->icmp.type, + TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type, + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK, + sizeof(key->icmp.type)) || + fl_dump_key_val(skb, &key->icmp.code, + TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code, + TCA_FLOWER_KEY_ICMPV4_CODE_MASK, + sizeof(key->icmp.code)))) + goto nla_put_failure; + else if (key->basic.n_proto == htons(ETH_P_IPV6) && + key->basic.ip_proto == IPPROTO_ICMPV6 && + (fl_dump_key_val(skb, &key->icmp.type, + TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type, + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK, + sizeof(key->icmp.type)) || + fl_dump_key_val(skb, &key->icmp.code, + TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code, + TCA_FLOWER_KEY_ICMPV6_CODE_MASK, + sizeof(key->icmp.code)))) + goto nla_put_failure; if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->enc_ipv4.src, @@ -1015,6 +1141,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->enc_tp.dst))) goto nla_put_failure; + if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) + goto nla_put_failure; + nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); if (tcf_exts_dump(skb, &f->exts)) |