summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c12
-rw-r--r--net/6lowpan/iphc.c57
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/8021q/vlan_netlink.c3
-rw-r--r--net/Makefile2
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c22
-rw-r--r--net/batman-adv/bat_iv_ogm.c17
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c123
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h11
-rw-r--r--net/batman-adv/distributed-arp-table.c64
-rw-r--r--net/batman-adv/log.h5
-rw-r--r--net/batman-adv/main.c3
-rw-r--r--net/batman-adv/main.h18
-rw-r--r--net/batman-adv/multicast.c12
-rw-r--r--net/batman-adv/routing.c25
-rw-r--r--net/batman-adv/send.c76
-rw-r--r--net/batman-adv/send.h4
-rw-r--r--net/batman-adv/soft-interface.c238
-rw-r--r--net/batman-adv/tp_meter.c7
-rw-r--r--net/batman-adv/translation-table.c42
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bluetooth/6lowpan.c192
-rw-r--r--net/bluetooth/af_bluetooth.c26
-rw-r--r--net/bluetooth/amp.c10
-rw-r--r--net/bluetooth/hci_core.c4
-rw-r--r--net/bluetooth/l2cap_core.c30
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bpf/Makefile1
-rw-r--r--net/bpf/test_run.c172
-rw-r--r--net/bridge/br_fdb.c7
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/bridge/br_mdb.c9
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/br_netlink.c4
-rw-r--r--net/bridge/br_netlink_tunnel.c4
-rw-r--r--net/bridge/netfilter/ebt_log.c34
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c6
-rw-r--r--net/can/af_can.c183
-rw-r--r--net/can/af_can.h13
-rw-r--r--net/can/bcm.c93
-rw-r--r--net/can/gw.c80
-rw-r--r--net/can/proc.c285
-rw-r--r--net/can/raw.c92
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c203
-rw-r--r--net/core/devlink.c862
-rw-r--r--net/core/drop_monitor.c5
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/fib_rules.c24
-rw-r--r--net/core/filter.c167
-rw-r--r--net/core/flow.c29
-rw-r--r--net/core/flow_dissector.c445
-rw-r--r--net/core/gro_cells.c2
-rw-r--r--net/core/lwt_bpf.c5
-rw-r--r--net/core/lwtunnel.c4
-rw-r--r--net/core/neighbour.c49
-rw-r--r--net/core/net_namespace.c10
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c143
-rw-r--r--net/core/secure_seq.c13
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c154
-rw-r--r--net/core/sock_diag.c15
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dcb/dcbnl.c60
-rw-r--r--net/decnet/af_decnet.c13
-rw-r--r--net/decnet/dn_dev.c12
-rw-r--r--net/decnet/dn_fib.c12
-rw-r--r--net/decnet/dn_route.c6
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/dsa/Kconfig8
-rw-r--r--net/dsa/Makefile4
-rw-r--r--net/dsa/dsa.c795
-rw-r--r--net/dsa/dsa2.c50
-rw-r--r--net/dsa/dsa_priv.h15
-rw-r--r--net/dsa/legacy.c818
-rw-r--r--net/dsa/slave.c13
-rw-r--r--net/dsa/switch.c15
-rw-r--r--net/dsa/tag_brcm.c27
-rw-r--r--net/dsa/tag_dsa.c27
-rw-r--r--net/dsa/tag_edsa.c27
-rw-r--r--net/dsa/tag_lan9303.c136
-rw-r--r--net/dsa/tag_mtk.c100
-rw-r--r--net/dsa/tag_qca.c27
-rw-r--r--net/dsa/tag_trailer.c26
-rw-r--r--net/hsr/hsr_netlink.c4
-rw-r--r--net/ieee802154/nl802154.c29
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c6
-rw-r--r--net/ipv4/devinet.c117
-rw-r--r--net/ipv4/esp4.c370
-rw-r--r--net/ipv4/esp4_offload.c231
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_notifier.c86
-rw-r--r--net/ipv4/fib_rules.c55
-rw-r--r--net/ipv4/fib_semantics.c11
-rw-r--r--net/ipv4/fib_trie.c108
-rw-r--r--net/ipv4/icmp.c19
-rw-r--r--net/ipv4/ip_gre.c24
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ip_tunnel.c27
-rw-r--r--net/ipv4/ip_tunnel_core.c5
-rw-r--r--net/ipv4/ip_vti.c20
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c24
-rw-r--r--net/ipv4/ipmr.c23
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c19
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c15
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/route.c123
-rw-r--r--net/ipv4/sysctl_net_ipv4.c107
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_fastopen.c102
-rw-r--r--net/ipv4/tcp_input.c157
-rw-r--r--net/ipv4/tcp_ipv4.c46
-rw-r--r--net/ipv4/tcp_metrics.c147
-rw-r--r--net/ipv4/tcp_minisocks.c26
-rw-r--r--net/ipv4/tcp_output.c20
-rw-r--r--net/ipv4/tcp_rate.c7
-rw-r--r--net/ipv4/tcp_recovery.c18
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--net/ipv4/tcp_westwood.c4
-rw-r--r--net/ipv4/xfrm4_mode_transport.c34
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c28
-rw-r--r--net/ipv4/xfrm4_output.c3
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c226
-rw-r--r--net/ipv6/addrlabel.c12
-rw-r--r--net/ipv6/af_inet6.c8
-rw-r--r--net/ipv6/esp6.c292
-rw-r--r--net/ipv6/esp6_offload.c233
-rw-r--r--net/ipv6/ila/ila_lwt.c3
-rw-r--r--net/ipv6/ip6_gre.c14
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_tunnel.c15
-rw-r--r--net/ipv6/ip6_vti.c10
-rw-r--r--net/ipv6/ip6mr.c9
-rw-r--r--net/ipv6/mcast.c49
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c2
-rw-r--r--net/ipv6/protocol.c2
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/seg6_iptunnel.c51
-rw-r--r--net/ipv6/sit.c37
-rw-r--r--net/ipv6/tcp_ipv6.c37
-rw-r--r--net/ipv6/udp.c68
-rw-r--r--net/ipv6/xfrm6_mode_transport.c34
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c27
-rw-r--r--net/ipv6/xfrm6_output.c9
-rw-r--r--net/kcm/kcmsock.c4
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_core.c62
-rw-r--r--net/l2tp/l2tp_core.h12
-rw-r--r--net/l2tp/l2tp_eth.c70
-rw-r--r--net/l2tp/l2tp_netlink.c5
-rw-r--r--net/mac80211/agg-rx.c12
-rw-r--r--net/mac80211/agg-tx.c12
-rw-r--r--net/mac80211/cfg.c203
-rw-r--r--net/mac80211/ibss.c4
-rw-r--r--net/mac80211/ieee80211_i.h3
-rw-r--r--net/mac80211/iface.c19
-rw-r--r--net/mac80211/main.c1
-rw-r--r--net/mac80211/mesh.c10
-rw-r--r--net/mac80211/mesh_hwmp.c23
-rw-r--r--net/mac80211/mesh_pathtbl.c8
-rw-r--r--net/mac80211/mlme.c41
-rw-r--r--net/mac80211/rate.c43
-rw-r--r--net/mac80211/rate.h3
-rw-r--r--net/mac80211/rx.c50
-rw-r--r--net/mac80211/spectmgmt.c4
-rw-r--r--net/mac80211/sta_info.c11
-rw-r--r--net/mac80211/sta_info.h18
-rw-r--r--net/mac80211/tx.c8
-rw-r--r--net/mac80211/util.c22
-rw-r--r--net/mac802154/ieee802154_i.h1
-rw-r--r--net/mpls/af_mpls.c367
-rw-r--r--net/mpls/internal.h68
-rw-r--r--net/mpls/mpls_iptunnel.c88
-rw-r--r--net/netfilter/ipset/ip_set_core.c29
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c24
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c24
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c57
-rw-r--r--net/netfilter/nf_conntrack_expect.c10
-rw-r--r--net/netfilter/nf_conntrack_netlink.c31
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c3
-rw-r--r--net/netfilter/nf_nat_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c76
-rw-r--r--net/netfilter/nfnetlink.c33
-rw-r--r--net/netfilter/nfnetlink_acct.c18
-rw-r--r--net/netfilter/nfnetlink_cthelper.c12
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c15
-rw-r--r--net/netfilter/nfnetlink_log.c14
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_compat.c10
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c171
-rw-r--r--net/netfilter/nft_dynset.c14
-rw-r--r--net/netfilter/nft_exthdr.c13
-rw-r--r--net/netfilter/nft_fib.c16
-rw-r--r--net/netfilter/nft_hash.c133
-rw-r--r--net/netfilter/nft_limit.c10
-rw-r--r--net/netfilter/nft_lookup.c14
-rw-r--r--net/netfilter/nft_masq.c4
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_nat.c4
-rw-r--r--net/netfilter/nft_objref.c14
-rw-r--r--net/netfilter/nft_quota.c3
-rw-r--r--net/netfilter/nft_redir.c4
-rw-r--r--net/netfilter/nft_reject.c5
-rw-r--r--net/netfilter/nft_reject_inet.c6
-rw-r--r--net/netfilter/nft_set_rbtree.c31
-rw-r--r--net/netfilter/xt_limit.c11
-rw-r--r--net/netlabel/netlabel_cipso_v4.c19
-rw-r--r--net/netlink/af_netlink.c90
-rw-r--r--net/netlink/af_netlink.h9
-rw-r--r--net/netlink/diag.c25
-rw-r--r--net/netlink/genetlink.c11
-rw-r--r--net/nfc/netlink.c29
-rw-r--r--net/openvswitch/actions.c271
-rw-r--r--net/openvswitch/conntrack.c29
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow_netlink.c145
-rw-r--r--net/openvswitch/vport-vxlan.c3
-rw-r--r--net/packet/af_packet.c44
-rw-r--r--net/phonet/pn_netlink.c12
-rw-r--r--net/qrtr/Kconfig2
-rw-r--r--net/qrtr/qrtr.c5
-rw-r--r--net/qrtr/smd.c42
-rw-r--r--net/rds/connection.c10
-rw-r--r--net/rds/ib_cm.c5
-rw-r--r--net/rds/ib_fmr.c38
-rw-r--r--net/rds/ib_mr.h2
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rxrpc/ar-internal.h19
-rw-r--r--net/rxrpc/call_accept.c6
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c4
-rw-r--r--net/rxrpc/conn_client.c1
-rw-r--r--net/rxrpc/conn_event.c17
-rw-r--r--net/rxrpc/input.c17
-rw-r--r--net/rxrpc/insecure.c10
-rw-r--r--net/rxrpc/peer_event.c2
-rw-r--r--net/rxrpc/recvmsg.c8
-rw-r--r--net/rxrpc/rxkad.c184
-rw-r--r--net/rxrpc/sendmsg.c17
-rw-r--r--net/sched/Kconfig45
-rw-r--r--net/sched/act_api.c48
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c3
-rw-r--r--net/sched/act_csum.c14
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c8
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c3
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c32
-rw-r--r--net/sched/cls_basic.c12
-rw-r--r--net/sched/cls_bpf.c14
-rw-r--r--net/sched/cls_cgroup.c10
-rw-r--r--net/sched/cls_flow.c17
-rw-r--r--net/sched/cls_flower.c87
-rw-r--r--net/sched/cls_fw.c32
-rw-r--r--net/sched/cls_matchall.c11
-rw-r--r--net/sched/cls_route.c46
-rw-r--r--net/sched/cls_rsvp.h38
-rw-r--r--net/sched/cls_tcindex.c16
-rw-r--r--net/sched/cls_u32.c73
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c68
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sched/sch_cbq.c9
-rw-r--r--net/sched/sch_choke.c59
-rw-r--r--net/sched/sch_codel.c2
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fq.c2
-rw-r--r--net/sched/sch_fq_codel.c5
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_gred.c4
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_hhf.c2
-rw-r--r--net/sched/sch_htb.c6
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c41
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_netem.c28
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sched/sch_prio.c5
-rw-r--r--net/sched/sch_qfq.c5
-rw-r--r--net/sched/sch_red.c4
-rw-r--r--net/sched/sch_sfb.c4
-rw-r--r--net/sched/sch_sfq.c5
-rw-r--r--net/sched/sch_tbf.c4
-rw-r--r--net/sctp/chunk.c14
-rw-r--r--net/sctp/outqueue.c10
-rw-r--r--net/sctp/sm_statefuns.c15
-rw-r--r--net/sctp/socket.c149
-rw-r--r--net/sctp/stream.c464
-rw-r--r--net/sctp/sysctl.c7
-rw-r--r--net/sctp/ulpevent.c56
-rw-r--r--net/smc/af_smc.c23
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c11
-rw-r--r--net/smc/smc_close.c74
-rw-r--r--net/smc/smc_close.h2
-rw-r--r--net/smc/smc_core.c2
-rw-r--r--net/smc/smc_ib.c3
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_pnet.c9
-rw-r--r--net/smc/smc_pnet.h1
-rw-r--r--net/smc/smc_rx.c3
-rw-r--r--net/smc/smc_tx.c6
-rw-r--r--net/smc/smc_wr.c2
-rw-r--r--net/socket.c46
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/tipc/bearer.c14
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/net.c4
-rw-r--r--net/tipc/netlink.c3
-rw-r--r--net/tipc/netlink_compat.c32
-rw-r--r--net/tipc/node.c14
-rw-r--r--net/tipc/socket.c30
-rw-r--r--net/tipc/subscr.c17
-rw-r--r--net/tipc/subscr.h3
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/vmw_vsock/Makefile2
-rw-r--r--net/vmw_vsock/af_vsock_tap.c114
-rw-r--r--net/vmw_vsock/virtio_transport.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c64
-rw-r--r--net/vmw_vsock/vmci_transport.c22
-rw-r--r--net/wireless/ap.c5
-rw-r--r--net/wireless/chan.c117
-rw-r--r--net/wireless/core.c68
-rw-r--r--net/wireless/core.h47
-rw-r--r--net/wireless/ibss.c1
-rw-r--r--net/wireless/mesh.c1
-rw-r--r--net/wireless/mlme.c70
-rw-r--r--net/wireless/nl80211.c546
-rw-r--r--net/wireless/nl80211.h10
-rw-r--r--net/wireless/rdev-ops.h21
-rw-r--r--net/wireless/reg.c148
-rw-r--r--net/wireless/reg.h36
-rw-r--r--net/wireless/scan.c5
-rw-r--r--net/wireless/sme.c172
-rw-r--r--net/wireless/trace.h22
-rw-r--r--net/wireless/util.c92
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_device.c208
-rw-r--r--net/xfrm/xfrm_hash.h4
-rw-r--r--net/xfrm/xfrm_input.c41
-rw-r--r--net/xfrm/xfrm_output.c46
-rw-r--r--net/xfrm/xfrm_policy.c27
-rw-r--r--net/xfrm/xfrm_replay.c162
-rw-r--r--net/xfrm/xfrm_state.c147
-rw-r--r--net/xfrm/xfrm_user.c38
389 files changed, 11685 insertions, 5116 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 5945f7e19c67..40d3d72beb53 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -23,10 +23,18 @@ int lowpan_register_netdevice(struct net_device *dev,
{
int i, ret;
- dev->addr_len = EUI64_ADDR_LEN;
+ switch (lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ dev->addr_len = EUI64_ADDR_LEN;
+ break;
+
+ case LOWPAN_LLTYPE_BTLE:
+ dev->addr_len = ETH_ALEN;
+ break;
+ }
+
dev->type = ARPHRD_6LOWPAN;
dev->mtu = IPV6_MIN_MTU;
- dev->priv_flags |= IFF_NO_QUEUE;
lowpan_dev(dev)->lltype = lltype;
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 79f1fa22509a..6b1042e21656 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -278,6 +278,23 @@ lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
return ret;
}
+static void lowpan_iphc_uncompress_lladdr(const struct net_device *dev,
+ struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ switch (dev->addr_len) {
+ case ETH_ALEN:
+ lowpan_iphc_uncompress_eui48_lladdr(ipaddr, lladdr);
+ break;
+ case EUI64_ADDR_LEN:
+ lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
/* Uncompress address function for source and
* destination address(non-multicast).
*
@@ -320,7 +337,7 @@ static int lowpan_iphc_uncompress_addr(struct sk_buff *skb,
lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
break;
default:
- lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_lladdr(dev, ipaddr, lladdr);
break;
}
break;
@@ -381,7 +398,7 @@ static int lowpan_iphc_uncompress_ctx_addr(struct sk_buff *skb,
lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
break;
default:
- lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ lowpan_iphc_uncompress_lladdr(dev, ipaddr, lladdr);
break;
}
ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
@@ -666,6 +683,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) {
case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC:
+ skb->pkt_type = PACKET_BROADCAST;
+
spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
if (!ci) {
@@ -681,11 +700,15 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
break;
case LOWPAN_IPHC_M:
+ skb->pkt_type = PACKET_BROADCAST;
+
/* multicast */
err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
iphc1 & LOWPAN_IPHC_DAM_MASK);
break;
case LOWPAN_IPHC_DAC:
+ skb->pkt_type = PACKET_HOST;
+
spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
if (!ci) {
@@ -701,6 +724,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
break;
default:
+ skb->pkt_type = PACKET_HOST;
+
err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.daddr,
iphc1 & LOWPAN_IPHC_DAM_MASK,
daddr);
@@ -802,6 +827,21 @@ lowpan_iphc_compress_ctx_802154_lladdr(const struct in6_addr *ipaddr,
return lladdr_compress;
}
+static bool lowpan_iphc_addr_equal(const struct net_device *dev,
+ const struct lowpan_iphc_ctx *ctx,
+ const struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ struct in6_addr tmp = {};
+
+ lowpan_iphc_uncompress_lladdr(dev, &tmp, lladdr);
+
+ if (ctx)
+ ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
+
+ return ipv6_addr_equal(&tmp, ipaddr);
+}
+
static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
const struct in6_addr *ipaddr,
const struct lowpan_iphc_ctx *ctx,
@@ -819,13 +859,7 @@ static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev,
}
break;
default:
- /* check for SAM/DAM = 11 */
- memcpy(&tmp.s6_addr[8], lladdr, EUI64_ADDR_LEN);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- tmp.s6_addr[8] ^= 0x02;
- /* context information are always used */
- ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
- if (ipv6_addr_equal(&tmp, ipaddr)) {
+ if (lowpan_iphc_addr_equal(dev, ctx, ipaddr, lladdr)) {
dam = LOWPAN_IPHC_DAM_11;
goto out;
}
@@ -921,11 +955,12 @@ static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct net_device *dev,
}
break;
default:
- if (is_addr_mac_addr_based(ipaddr, lladdr)) {
- dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
+ if (lowpan_iphc_addr_equal(dev, NULL, ipaddr, lladdr)) {
+ dam = LOWPAN_IPHC_DAM_11;
pr_debug("address compression 0 bits\n");
goto out;
}
+
break;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e97ab824e368..9ee5787634e5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -562,8 +562,7 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
NETIF_F_ALL_FCOE;
- dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
- NETIF_F_GSO_SOFTWARE;
+ dev->features |= dev->hw_features | NETIF_F_LLTX;
dev->gso_max_size = real_dev->gso_max_size;
dev->gso_max_segs = real_dev->gso_max_segs;
if (dev->features & NETIF_F_VLAN_FEATURES)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 1270207f3d7c..9c94aad153b3 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -35,7 +35,8 @@ static inline int vlan_validate_qos_map(struct nlattr *attr)
{
if (!attr)
return 0;
- return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy);
+ return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy,
+ NULL);
}
static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/net/Makefile b/net/Makefile
index 9b681550e3a3..9086ffbb5085 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_XFRM) += xfrm/
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 53b4ac09e7b7..ec527b62f79d 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -106,7 +106,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
entry->expires = jiffies - 1;
/* force resolution or expiration */
error = neigh_update(entry->neigh, NULL, NUD_NONE,
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
if (error)
pr_crit("neigh_update failed with %d\n", error);
goto out;
@@ -481,7 +481,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
link_vcc(clip_vcc, entry);
}
error = neigh_update(neigh, llc_oui, NUD_PERMANENT,
- NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
return error;
}
diff --git a/net/atm/common.c b/net/atm/common.c
index 9613381f5db0..f06422f4108d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -62,21 +62,16 @@ static void vcc_remove_socket(struct sock *sk)
write_unlock_irq(&vcc_sklist_lock);
}
-static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size)
+static bool vcc_tx_ready(struct atm_vcc *vcc, unsigned int size)
{
- struct sk_buff *skb;
struct sock *sk = sk_atm(vcc);
if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {
pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n",
sk_wmem_alloc_get(sk), size, sk->sk_sndbuf);
- return NULL;
+ return false;
}
- while (!(skb = alloc_skb(size, GFP_KERNEL)))
- schedule();
- pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
- atomic_add(skb->truesize, &sk->sk_wmem_alloc);
- return skb;
+ return true;
}
static void vcc_sock_destruct(struct sock *sk)
@@ -606,7 +601,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
eff = (size+3) & ~3; /* align to word boundary */
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
error = 0;
- while (!(skb = alloc_tx(vcc, eff))) {
+ while (!vcc_tx_ready(vcc, eff)) {
if (m->msg_flags & MSG_DONTWAIT) {
error = -EAGAIN;
break;
@@ -628,6 +623,15 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
finish_wait(sk_sleep(sk), &wait);
if (error)
goto out;
+
+ skb = alloc_skb(eff, GFP_KERNEL);
+ if (!skb) {
+ error = -ENOMEM;
+ goto out;
+ }
+ pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
+ atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+
skb->dev = NULL; /* for paths shared with net_device interfaces */
ATM_SKB(skb)->atm_options = vcc->atm_options;
if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 71343d0fec94..495ba7cdcb04 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -679,15 +679,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_forw_packet *forw_packet_aggr;
+ struct sk_buff *skb;
unsigned char *skb_buff;
unsigned int skb_size;
atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
- forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
- queue_left, bat_priv);
- if (!forw_packet_aggr)
- return;
-
if (atomic_read(&bat_priv->aggregated_ogms) &&
packet_len < BATADV_MAX_AGGREGATION_BYTES)
skb_size = BATADV_MAX_AGGREGATION_BYTES;
@@ -696,9 +692,14 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
skb_size += ETH_HLEN;
- forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
- if (!forw_packet_aggr->skb) {
- batadv_forw_packet_free(forw_packet_aggr, true);
+ skb = netdev_alloc_skb_ip_align(NULL, skb_size);
+ if (!skb)
+ return;
+
+ forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+ queue_left, bat_priv, skb);
+ if (!forw_packet_aggr) {
+ kfree_skb(skb);
return;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ba8420d8a992..d07e89ec8467 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -395,7 +395,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): CLAIM %pM on vid %d\n", mac,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_UNCLAIM:
/* unclaim frame
@@ -404,7 +404,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): UNCLAIM %pM on vid %d\n", mac,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_ANNOUNCE:
/* announcement frame
@@ -413,7 +413,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): ANNOUNCE of %pM on vid %d\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid));
+ ethhdr->h_source, batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_REQUEST:
/* request frame
@@ -425,14 +425,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): REQUEST of %pM to %pM on vid %d\n",
ethhdr->h_source, ethhdr->h_dest,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_LOOPDETECT:
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n",
ethhdr->h_source, ethhdr->h_dest,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
}
@@ -475,9 +475,9 @@ static void batadv_bla_loopdetect_report(struct work_struct *work)
batadv_info(bat_priv->soft_iface,
"Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n",
- BATADV_PRINT_VID(backbone_gw->vid));
+ batadv_print_vid(backbone_gw->vid));
snprintf(vid_str, sizeof(vid_str), "%d",
- BATADV_PRINT_VID(backbone_gw->vid));
+ batadv_print_vid(backbone_gw->vid));
vid_str[sizeof(vid_str) - 1] = 0;
batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT,
@@ -510,7 +510,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n",
- orig, BATADV_PRINT_VID(vid));
+ orig, batadv_print_vid(vid));
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
@@ -719,7 +719,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
- mac, BATADV_PRINT_VID(vid));
+ mac, batadv_print_vid(vid));
kref_get(&claim->refcount);
hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
@@ -739,8 +739,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
goto claim_free_ref;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_add_claim(): changing ownership for %pM, vid %d\n",
- mac, BATADV_PRINT_VID(vid));
+ "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n",
+ mac, batadv_print_vid(vid), backbone_gw->orig);
remove_crc = true;
}
@@ -809,7 +809,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
return;
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
- mac, BATADV_PRINT_VID(vid));
+ mac, batadv_print_vid(vid));
batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
batadv_choose_claim, claim);
@@ -849,7 +849,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
- BATADV_PRINT_VID(vid), backbone_gw->orig, crc);
+ batadv_print_vid(vid), backbone_gw->orig, crc);
spin_lock_bh(&backbone_gw->crc_lock);
backbone_crc = backbone_gw->crc;
@@ -859,7 +859,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
"handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
backbone_gw->orig,
- BATADV_PRINT_VID(backbone_gw->vid),
+ batadv_print_vid(backbone_gw->vid),
backbone_crc, crc);
batadv_bla_send_request(backbone_gw);
@@ -904,7 +904,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_request(): REQUEST vid %d (sent by %pM)...\n",
- BATADV_PRINT_VID(vid), ethhdr->h_source);
+ batadv_print_vid(vid), ethhdr->h_source);
batadv_bla_answer_request(bat_priv, primary_if, vid);
return true;
@@ -941,7 +941,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
/* this must be an UNCLAIM frame */
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n",
- claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
+ claim_addr, batadv_print_vid(vid), backbone_gw->orig);
batadv_bla_del_claim(bat_priv, claim_addr, vid);
batadv_backbone_gw_put(backbone_gw);
@@ -1161,7 +1161,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
if (ret == 1)
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src,
+ ethhdr->h_source, batadv_print_vid(vid), hw_src,
hw_dst);
if (ret < 2)
@@ -1197,7 +1197,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst);
+ ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst);
return true;
}
@@ -1295,10 +1295,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
goto skip;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_purge_claims(): %pM, vid %d, time out\n",
- claim->addr, claim->vid);
+ "bla_purge_claims(): timed out.\n");
purge_now:
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "bla_purge_claims(): %pM, vid %d\n",
+ claim->addr, claim->vid);
+
batadv_handle_unclaim(bat_priv, primary_if,
backbone_gw->orig,
claim->addr, claim->vid);
@@ -1846,6 +1849,13 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
/* possible optimization: race for a claim */
/* No claim exists yet, claim it for us!
*/
+
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n",
+ ethhdr->h_source,
+ batadv_is_my_client(bat_priv,
+ ethhdr->h_source, vid) ?
+ "yes" : "no");
batadv_handle_claim(bat_priv, primary_if,
primary_if->net_dev->dev_addr,
ethhdr->h_source, vid);
@@ -1963,10 +1973,22 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
/* if yes, the client has roamed and we have
* to unclaim it.
*/
- batadv_handle_unclaim(bat_priv, primary_if,
- primary_if->net_dev->dev_addr,
- ethhdr->h_source, vid);
- goto allow;
+ if (batadv_has_timed_out(claim->lasttime, 100)) {
+ /* only unclaim if the last claim entry is
+ * older than 100 ms to make sure we really
+ * have a roaming client here.
+ */
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n",
+ ethhdr->h_source);
+ batadv_handle_unclaim(bat_priv, primary_if,
+ primary_if->net_dev->dev_addr,
+ ethhdr->h_source, vid);
+ goto allow;
+ } else {
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n",
+ ethhdr->h_source);
+ goto handled;
+ }
}
/* check if it is a multicast/broadcast frame */
@@ -2042,7 +2064,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
backbone_crc = backbone_gw->crc;
spin_unlock_bh(&backbone_gw->crc_lock);
seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
- claim->addr, BATADV_PRINT_VID(claim->vid),
+ claim->addr, batadv_print_vid(claim->vid),
backbone_gw->orig,
(is_own ? 'x' : ' '),
backbone_crc);
@@ -2274,7 +2296,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n",
backbone_gw->orig,
- BATADV_PRINT_VID(backbone_gw->vid), secs,
+ batadv_print_vid(backbone_gw->vid), secs,
msecs, backbone_crc);
}
rcu_read_unlock();
@@ -2449,3 +2471,52 @@ out:
return ret;
}
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+/**
+ * batadv_bla_check_claim - check if address is claimed
+ *
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: mac address of which the claim status is checked
+ * @vid: the VLAN ID
+ *
+ * addr is checked if this address is claimed by the local device itself.
+ *
+ * Return: true if bla is disabled or the mac is claimed by the device,
+ * false if the device addr is already claimed by another gateway
+ */
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv,
+ u8 *addr, unsigned short vid)
+{
+ struct batadv_bla_claim search_claim;
+ struct batadv_bla_claim *claim = NULL;
+ struct batadv_hard_iface *primary_if = NULL;
+ bool ret = true;
+
+ if (!atomic_read(&bat_priv->bridge_loop_avoidance))
+ return ret;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ return ret;
+
+ /* First look if the mac address is claimed */
+ ether_addr_copy(search_claim.addr, addr);
+ search_claim.vid = vid;
+
+ claim = batadv_claim_hash_find(bat_priv, &search_claim);
+
+ /* If there is a claim and we are not owner of the claim,
+ * return false.
+ */
+ if (claim) {
+ if (!batadv_compare_eth(claim->backbone_gw->orig,
+ primary_if->net_dev->dev_addr))
+ ret = false;
+ batadv_claim_put(claim);
+ }
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+#endif
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index e157986bd01c..234775748b8e 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -69,6 +69,10 @@ void batadv_bla_status_update(struct net_device *net_dev);
int batadv_bla_init(struct batadv_priv *bat_priv);
void batadv_bla_free(struct batadv_priv *bat_priv);
int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
+#ifdef CONFIG_BATMAN_ADV_DAT
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
+ unsigned short vid);
+#endif
#define BATADV_BLA_CRC_INIT 0
#else /* ifdef CONFIG_BATMAN_ADV_BLA */
@@ -145,6 +149,13 @@ static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
return -EOPNOTSUPP;
}
+static inline
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
+ unsigned short vid)
+{
+ return true;
+}
+
#endif /* ifdef CONFIG_BATMAN_ADV_BLA */
#endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 1bfd1dbc2feb..013e970eff39 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -43,6 +43,7 @@
#include <linux/workqueue.h>
#include <net/arp.h>
+#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -330,7 +331,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
batadv_dbg(BATADV_DBG_DAT, bat_priv,
"Entry updated: %pI4 %pM (vid: %d)\n",
&dat_entry->ip, dat_entry->mac_addr,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
goto out;
}
@@ -356,7 +357,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
}
batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n",
- &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid));
+ &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid));
out:
if (dat_entry)
@@ -835,7 +836,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
&dat_entry->ip, dat_entry->mac_addr,
- BATADV_PRINT_VID(dat_entry->vid),
+ batadv_print_vid(dat_entry->vid),
last_seen_mins, last_seen_secs);
}
rcu_read_unlock();
@@ -1002,6 +1003,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
bool ret = false;
struct batadv_dat_entry *dat_entry = NULL;
struct sk_buff *skb_new;
+ struct net_device *soft_iface = bat_priv->soft_iface;
int hdr_size = 0;
unsigned short vid;
@@ -1040,16 +1042,30 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
goto out;
}
+ /* If BLA is enabled, only send ARP replies if we have claimed
+ * the destination for the ARP request or if no one else of
+ * the backbone gws belonging to our backbone has claimed the
+ * destination.
+ */
+ if (!batadv_bla_check_claim(bat_priv,
+ dat_entry->mac_addr, vid)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "Device %pM claimed by another backbone gw. Don't send ARP reply!",
+ dat_entry->mac_addr);
+ ret = true;
+ goto out;
+ }
+
skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
dat_entry->mac_addr,
hw_src, vid);
if (!skb_new)
goto out;
- skb_new->protocol = eth_type_trans(skb_new,
- bat_priv->soft_iface);
- bat_priv->stats.rx_packets++;
- bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+ skb_new->protocol = eth_type_trans(skb_new, soft_iface);
+
+ soft_iface->stats.rx_packets++;
+ soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
netif_rx(skb_new);
batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
@@ -1188,6 +1204,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
{
+ struct batadv_dat_entry *dat_entry = NULL;
u16 type;
__be32 ip_src, ip_dst;
u8 *hw_src, *hw_dst;
@@ -1210,12 +1227,41 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
hw_dst = batadv_arp_hw_dst(skb, hdr_size);
ip_dst = batadv_arp_ip_dst(skb, hdr_size);
+ /* If ip_dst is already in cache and has the right mac address,
+ * drop this frame if this ARP reply is destined for us because it's
+ * most probably an ARP reply generated by another node of the DHT.
+ * We have most probably received already a reply earlier. Delivering
+ * this frame would lead to doubled receive of an ARP reply.
+ */
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_src, vid);
+ if (dat_entry && batadv_compare_eth(hw_src, dat_entry->mac_addr)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "Doubled ARP reply removed: ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]; dat_entry: %pM-%pI4\n",
+ hw_src, &ip_src, hw_dst, &ip_dst,
+ dat_entry->mac_addr, &dat_entry->ip);
+ dropped = true;
+ goto out;
+ }
+
/* Update our internal cache with both the IP addresses the node got
* within the ARP reply
*/
batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
+ /* If BLA is enabled, only forward ARP replies if we have claimed the
+ * source of the ARP reply or if no one else of the same backbone has
+ * already claimed that client. This prevents that different gateways
+ * to the same backbone all forward the ARP reply leading to multiple
+ * replies in the backbone.
+ */
+ if (!batadv_bla_check_claim(bat_priv, hw_src, vid)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "Device %pM claimed by another backbone gw. Drop ARP reply.\n",
+ hw_src);
+ dropped = true;
+ goto out;
+ }
+
/* if this REPLY is directed to a client of mine, let's deliver the
* packet to the interface
*/
@@ -1228,6 +1274,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
out:
if (dropped)
kfree_skb(skb);
+ if (dat_entry)
+ batadv_dat_entry_put(dat_entry);
/* if dropped == false -> deliver to the interface */
return dropped;
}
@@ -1256,7 +1304,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
/* If this packet is an ARP_REQUEST and the node already has the
* information that it is going to ask, then the packet can be dropped
*/
- if (forw_packet->num_packets)
+ if (batadv_forw_packet_is_rebroadcast(forw_packet))
goto out;
vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size);
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 7a2b9f4da078..65ce97efa6b5 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -73,9 +73,10 @@ __printf(2, 3);
/* possibly ratelimited debug output */
#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
do { \
- if (atomic_read(&(bat_priv)->log_level) & (type) && \
+ struct batadv_priv *__batpriv = (bat_priv); \
+ if (atomic_read(&__batpriv->log_level) & (type) && \
(!(ratelimited) || net_ratelimit())) \
- batadv_debug_log(bat_priv, fmt, ## arg); \
+ batadv_debug_log(__batpriv, fmt, ## arg); \
} \
while (0)
#else /* !CONFIG_BATMAN_ADV_DEBUG */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5000c540614d..fb381fb26a66 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -516,6 +516,9 @@ static void batadv_recv_handler_init(void)
BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12);
BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8);
+ i = FIELD_SIZEOF(struct sk_buff, cb);
+ BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i);
+
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 57a8103dbce7..810f7d026f54 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.0"
+#define BATADV_SOURCE_VERSION "2017.1"
#endif
/* B.A.T.M.A.N. parameters */
@@ -193,6 +193,7 @@ enum batadv_uev_type {
#include <linux/percpu.h>
#include <linux/types.h>
+#include "packet.h"
#include "types.h"
struct net_device;
@@ -200,8 +201,19 @@ struct packet_type;
struct seq_file;
struct sk_buff;
-#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \
- (int)((vid) & VLAN_VID_MASK) : -1)
+/**
+ * batadv_print_vid - return printable version of vid information
+ * @vid: the VLAN identifier
+ *
+ * Return: -1 when no VLAN is used, VLAN id otherwise
+ */
+static inline int batadv_print_vid(unsigned short vid)
+{
+ if (vid & BATADV_VLAN_HAS_TAG)
+ return (int)(vid & VLAN_VID_MASK);
+ else
+ return -1;
+}
extern struct list_head batadv_hardif_list;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 952ba81a565b..d327670641ac 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -494,9 +494,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
if (!bridged)
goto update;
-#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
- pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
-#endif
+ if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING))
+ pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
@@ -671,7 +670,6 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
return 0;
}
-#if IS_ENABLED(CONFIG_IPV6)
/**
* batadv_mcast_is_report_ipv6 - check for MLD reports
* @skb: the ethernet frame destined for the mesh
@@ -736,7 +734,6 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
return 0;
}
-#endif
/**
* batadv_mcast_forw_mode_check - check for optimized forwarding potential
@@ -765,11 +762,12 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
case ETH_P_IP:
return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
is_unsnoopable);
-#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return -EINVAL;
+
return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
is_unsnoopable);
-#endif
default:
return -EINVAL;
}
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7fd740b6e36d..e1ebe14ee2a6 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -941,15 +941,17 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_unicast_packet *unicast_packet;
struct batadv_unicast_4addr_packet *unicast_4addr_packet;
- u8 *orig_addr;
- struct batadv_orig_node *orig_node = NULL;
+ u8 *orig_addr, *orig_addr_gw;
+ struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
int check, hdr_size = sizeof(*unicast_packet);
enum batadv_subtype subtype;
- bool is4addr;
+ struct ethhdr *ethhdr;
int ret = NET_RX_DROP;
+ bool is4addr, is_gw;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
+ ethhdr = eth_hdr(skb);
is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
/* the caller function should have already pulled 2 bytes */
@@ -972,6 +974,23 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
+ /* If this is a unicast packet from another backgone gw,
+ * drop it.
+ */
+ orig_addr_gw = ethhdr->h_source;
+ orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
+ if (orig_node_gw) {
+ is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
+ hdr_size);
+ batadv_orig_node_put(orig_node_gw);
+ if (is_gw) {
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
+ orig_addr_gw);
+ return NET_RX_DROP;
+ }
+ }
+
if (is4addr) {
subtype = unicast_4addr_packet->subtype;
batadv_dat_inc_counter(bat_priv, subtype);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 1489ec27daff..403df596a73d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -482,6 +482,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
* @if_outgoing: The (optional) if_outgoing to be grabbed
* @queue_left: The (optional) queue counter to decrease
* @bat_priv: The bat_priv for the mesh of this forw_packet
+ * @skb: The raw packet this forwarding packet shall contain
*
* Allocates a forwarding packet and tries to get a reference to the
* (optional) if_incoming, if_outgoing and queue_left. If queue_left
@@ -493,7 +494,8 @@ struct batadv_forw_packet *
batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing,
atomic_t *queue_left,
- struct batadv_priv *bat_priv)
+ struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
{
struct batadv_forw_packet *forw_packet;
const char *qname;
@@ -525,7 +527,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
INIT_HLIST_NODE(&forw_packet->list);
INIT_HLIST_NODE(&forw_packet->cleanup_list);
- forw_packet->skb = NULL;
+ forw_packet->skb = skb;
forw_packet->queue_left = queue_left;
forw_packet->if_incoming = if_incoming;
forw_packet->if_outgoing = if_outgoing;
@@ -756,22 +758,23 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
if (!primary_if)
goto err;
+ newskb = skb_copy(skb, GFP_ATOMIC);
+ if (!newskb) {
+ batadv_hardif_put(primary_if);
+ goto err;
+ }
+
forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
&bat_priv->bcast_queue_left,
- bat_priv);
+ bat_priv, newskb);
batadv_hardif_put(primary_if);
if (!forw_packet)
- goto err;
-
- newskb = skb_copy(skb, GFP_ATOMIC);
- if (!newskb)
goto err_packet_free;
/* as we have a copy now, it is safe to decrease the TTL */
bcast_packet = (struct batadv_bcast_packet *)newskb->data;
bcast_packet->ttl--;
- forw_packet->skb = newskb;
forw_packet->own = own_packet;
INIT_DELAYED_WORK(&forw_packet->delayed_work,
@@ -781,11 +784,60 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
return NETDEV_TX_OK;
err_packet_free:
- batadv_forw_packet_free(forw_packet, true);
+ kfree_skb(newskb);
err:
return NETDEV_TX_BUSY;
}
+/**
+ * batadv_forw_packet_bcasts_left - check if a retransmission is necessary
+ * @forw_packet: the forwarding packet to check
+ * @hard_iface: the interface to check on
+ *
+ * Checks whether a given packet has any (re)transmissions left on the provided
+ * interface.
+ *
+ * hard_iface may be NULL: In that case the number of transmissions this skb had
+ * so far is compared with the maximum amount of retransmissions independent of
+ * any interface instead.
+ *
+ * Return: True if (re)transmissions are left, false otherwise.
+ */
+static bool
+batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet,
+ struct batadv_hard_iface *hard_iface)
+{
+ unsigned int max;
+
+ if (hard_iface)
+ max = hard_iface->num_bcasts;
+ else
+ max = BATADV_NUM_BCASTS_MAX;
+
+ return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max;
+}
+
+/**
+ * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet
+ * @forw_packet: the packet to increase the counter for
+ */
+static void
+batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
+{
+ BATADV_SKB_CB(forw_packet->skb)->num_bcasts++;
+}
+
+/**
+ * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions
+ * @forw_packet: the packet to check
+ *
+ * Return: True if this packet was transmitted before, false otherwise.
+ */
+bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet)
+{
+ return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0;
+}
+
static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
{
struct batadv_hard_iface *hard_iface;
@@ -826,7 +878,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
if (hard_iface->soft_iface != soft_iface)
continue;
- if (forw_packet->num_packets >= hard_iface->num_bcasts)
+ if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface))
continue;
if (forw_packet->own) {
@@ -884,10 +936,10 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
}
rcu_read_unlock();
- forw_packet->num_packets++;
+ batadv_forw_packet_bcasts_inc(forw_packet);
/* if we still have some more bcasts to send */
- if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
+ if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) {
batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
send_time);
return;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index f21166d10323..a16b34f473ef 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -34,11 +34,13 @@ struct batadv_forw_packet *
batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing,
atomic_t *queue_left,
- struct batadv_priv *bat_priv);
+ struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l);
void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet,
unsigned long send_time);
+bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet);
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d042c99af028..b25789abf7b9 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -64,28 +64,6 @@
#include "sysfs.h"
#include "translation-table.h"
-static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
-static void batadv_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info);
-static u32 batadv_get_msglevel(struct net_device *dev);
-static void batadv_set_msglevel(struct net_device *dev, u32 value);
-static u32 batadv_get_link(struct net_device *dev);
-static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data);
-static void batadv_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data);
-static int batadv_get_sset_count(struct net_device *dev, int stringset);
-
-static const struct ethtool_ops batadv_ethtool_ops = {
- .get_settings = batadv_get_settings,
- .get_drvinfo = batadv_get_drvinfo,
- .get_msglevel = batadv_get_msglevel,
- .set_msglevel = batadv_set_msglevel,
- .get_link = batadv_get_link,
- .get_strings = batadv_get_strings,
- .get_ethtool_stats = batadv_get_ethtool_stats,
- .get_sset_count = batadv_get_sset_count,
-};
-
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
{
int result;
@@ -140,7 +118,7 @@ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
- struct net_device_stats *stats = &bat_priv->stats;
+ struct net_device_stats *stats = &dev->stats;
stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX);
stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES);
@@ -230,6 +208,9 @@ static int batadv_interface_tx(struct sk_buff *skb,
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
+ /* reset control block to avoid left overs from previous users */
+ memset(skb->cb, 0, sizeof(struct batadv_skb_cb));
+
netif_trans_update(soft_iface);
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
@@ -947,6 +928,98 @@ static const struct net_device_ops batadv_netdev_ops = {
.ndo_del_slave = batadv_softif_slave_del,
};
+static void batadv_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
+ strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
+ strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
+}
+
+/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
+ * Declare each description string in struct.name[] to get fixed sized buffer
+ * and compile time checking for strings longer than ETH_GSTRING_LEN.
+ */
+static const struct {
+ const char name[ETH_GSTRING_LEN];
+} batadv_counters_strings[] = {
+ { "tx" },
+ { "tx_bytes" },
+ { "tx_dropped" },
+ { "rx" },
+ { "rx_bytes" },
+ { "forward" },
+ { "forward_bytes" },
+ { "mgmt_tx" },
+ { "mgmt_tx_bytes" },
+ { "mgmt_rx" },
+ { "mgmt_rx_bytes" },
+ { "frag_tx" },
+ { "frag_tx_bytes" },
+ { "frag_rx" },
+ { "frag_rx_bytes" },
+ { "frag_fwd" },
+ { "frag_fwd_bytes" },
+ { "tt_request_tx" },
+ { "tt_request_rx" },
+ { "tt_response_tx" },
+ { "tt_response_rx" },
+ { "tt_roam_adv_tx" },
+ { "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_DAT
+ { "dat_get_tx" },
+ { "dat_get_rx" },
+ { "dat_put_tx" },
+ { "dat_put_rx" },
+ { "dat_cached_reply_tx" },
+#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+ { "nc_code" },
+ { "nc_code_bytes" },
+ { "nc_recode" },
+ { "nc_recode_bytes" },
+ { "nc_buffer" },
+ { "nc_decode" },
+ { "nc_decode_bytes" },
+ { "nc_decode_failed" },
+ { "nc_sniffed" },
+#endif
+};
+
+static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ if (stringset == ETH_SS_STATS)
+ memcpy(data, batadv_counters_strings,
+ sizeof(batadv_counters_strings));
+}
+
+static void batadv_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < BATADV_CNT_NUM; i++)
+ data[i] = batadv_sum_counter(bat_priv, i);
+}
+
+static int batadv_get_sset_count(struct net_device *dev, int stringset)
+{
+ if (stringset == ETH_SS_STATS)
+ return BATADV_CNT_NUM;
+
+ return -EOPNOTSUPP;
+}
+
+static const struct ethtool_ops batadv_ethtool_ops = {
+ .get_drvinfo = batadv_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = batadv_get_strings,
+ .get_ethtool_stats = batadv_get_ethtool_stats,
+ .get_sset_count = batadv_get_sset_count,
+};
+
/**
* batadv_softif_free - Deconstructor of batadv_soft_interface
* @dev: Device to cleanup and remove
@@ -971,8 +1044,6 @@ static void batadv_softif_free(struct net_device *dev)
*/
static void batadv_softif_init_early(struct net_device *dev)
{
- struct batadv_priv *priv = netdev_priv(dev);
-
ether_setup(dev);
dev->netdev_ops = &batadv_netdev_ops;
@@ -989,8 +1060,6 @@ static void batadv_softif_init_early(struct net_device *dev)
eth_hw_addr_random(dev);
dev->ethtool_ops = &batadv_ethtool_ops;
-
- memset(priv, 0, sizeof(*priv));
}
struct net_device *batadv_softif_create(struct net *net, const char *name)
@@ -1083,118 +1152,3 @@ struct rtnl_link_ops batadv_link_ops __read_mostly = {
.setup = batadv_softif_init_early,
.dellink = batadv_softif_destroy_netlink,
};
-
-/* ethtool */
-static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
- cmd->supported = 0;
- cmd->advertising = 0;
- ethtool_cmd_speed_set(cmd, SPEED_10);
- cmd->duplex = DUPLEX_FULL;
- cmd->port = PORT_TP;
- cmd->phy_address = 0;
- cmd->transceiver = XCVR_INTERNAL;
- cmd->autoneg = AUTONEG_DISABLE;
- cmd->maxtxpkt = 0;
- cmd->maxrxpkt = 0;
-
- return 0;
-}
-
-static void batadv_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
-{
- strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
- strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
- strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
-}
-
-static u32 batadv_get_msglevel(struct net_device *dev)
-{
- return -EOPNOTSUPP;
-}
-
-static void batadv_set_msglevel(struct net_device *dev, u32 value)
-{
-}
-
-static u32 batadv_get_link(struct net_device *dev)
-{
- return 1;
-}
-
-/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
- * Declare each description string in struct.name[] to get fixed sized buffer
- * and compile time checking for strings longer than ETH_GSTRING_LEN.
- */
-static const struct {
- const char name[ETH_GSTRING_LEN];
-} batadv_counters_strings[] = {
- { "tx" },
- { "tx_bytes" },
- { "tx_dropped" },
- { "rx" },
- { "rx_bytes" },
- { "forward" },
- { "forward_bytes" },
- { "mgmt_tx" },
- { "mgmt_tx_bytes" },
- { "mgmt_rx" },
- { "mgmt_rx_bytes" },
- { "frag_tx" },
- { "frag_tx_bytes" },
- { "frag_rx" },
- { "frag_rx_bytes" },
- { "frag_fwd" },
- { "frag_fwd_bytes" },
- { "tt_request_tx" },
- { "tt_request_rx" },
- { "tt_response_tx" },
- { "tt_response_rx" },
- { "tt_roam_adv_tx" },
- { "tt_roam_adv_rx" },
-#ifdef CONFIG_BATMAN_ADV_DAT
- { "dat_get_tx" },
- { "dat_get_rx" },
- { "dat_put_tx" },
- { "dat_put_rx" },
- { "dat_cached_reply_tx" },
-#endif
-#ifdef CONFIG_BATMAN_ADV_NC
- { "nc_code" },
- { "nc_code_bytes" },
- { "nc_recode" },
- { "nc_recode_bytes" },
- { "nc_buffer" },
- { "nc_decode" },
- { "nc_decode_bytes" },
- { "nc_decode_failed" },
- { "nc_sniffed" },
-#endif
-};
-
-static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data)
-{
- if (stringset == ETH_SS_STATS)
- memcpy(data, batadv_counters_strings,
- sizeof(batadv_counters_strings));
-}
-
-static void batadv_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data)
-{
- struct batadv_priv *bat_priv = netdev_priv(dev);
- int i;
-
- for (i = 0; i < BATADV_CNT_NUM; i++)
- data[i] = batadv_sum_counter(bat_priv, i);
-}
-
-static int batadv_get_sset_count(struct net_device *dev, int stringset)
-{
- if (stringset == ETH_SS_STATS)
- return BATADV_CNT_NUM;
-
- return -EOPNOTSUPP;
-}
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index c94ebdecdc3d..556f9a865ddf 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -873,8 +873,8 @@ static int batadv_tp_send(void *arg)
/* something went wrong during the preparation/transmission */
if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) {
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: batadv_tp_send() cannot send packets (%d)\n",
- err);
+ "Meter: %s() cannot send packets (%d)\n",
+ __func__, err);
/* ensure nobody else tries to stop the thread now */
if (atomic_dec_and_test(&tp_vars->sending))
tp_vars->reason = err;
@@ -979,7 +979,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
if (!tp_vars) {
spin_unlock_bh(&bat_priv->tp_list_lock);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: batadv_tp_start cannot allocate list elements\n");
+ "Meter: %s cannot allocate list elements\n",
+ __func__);
batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
dst, bat_priv, session_cookie);
return;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 6077a87d46f0..e75b4937b497 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -617,7 +617,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(tt_global->common.vid), message);
+ batadv_print_vid(tt_global->common.vid), message);
batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
batadv_choose_tt, &tt_global->common);
@@ -671,7 +671,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Re-adding pending client %pM (vid: %d)\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
/* whatever the reason why the PENDING flag was set,
* this is a client which was enqueued to be removed in
* this orig_interval. Since it popped up again, the
@@ -684,7 +684,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Roaming client %pM (vid: %d) came back to its original location\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
/* the ROAM flag is set because this client roamed away
* and the node got a roaming_advertisement message. Now
* that the client popped up again at its original
@@ -716,7 +716,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (!vlan) {
net_ratelimited_function(batadv_info, soft_iface,
"adding TT local entry %pM to non-existent VLAN %d\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
kmem_cache_free(batadv_tl_cache, tt_local);
tt_local = NULL;
goto out;
@@ -724,7 +724,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
- addr, BATADV_PRINT_VID(vid),
+ addr, batadv_print_vid(vid),
(u8)atomic_read(&bat_priv->tt.vn));
ether_addr_copy(tt_local->common.addr, addr);
@@ -1097,7 +1097,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
" * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
((tt_common_entry->flags &
BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
no_purge ? 'P' : '.',
@@ -1296,7 +1296,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Local tt entry (%pM, vid: %d) pending to be removed: %s\n",
tt_local_entry->common.addr,
- BATADV_PRINT_VID(tt_local_entry->common.vid), message);
+ batadv_print_vid(tt_local_entry->common.vid), message);
}
/**
@@ -1727,7 +1727,7 @@ add_orig_entry:
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (vid: %d, via %pM)\n",
- common->addr, BATADV_PRINT_VID(common->vid),
+ common->addr, batadv_print_vid(common->vid),
orig_node->orig);
ret = true;
@@ -1835,7 +1835,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
if (!vlan) {
seq_printf(seq,
" * Cannot retrieve VLAN %d for originator %pM\n",
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
best_entry->orig_node->orig);
goto print_list;
}
@@ -1844,7 +1844,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
seq_printf(seq,
" %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n",
'*', tt_global_entry->common.addr,
- BATADV_PRINT_VID(tt_global_entry->common.vid),
+ batadv_print_vid(tt_global_entry->common.vid),
best_entry->ttvn, best_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
@@ -1867,7 +1867,7 @@ print_list:
if (!vlan) {
seq_printf(seq,
" + Cannot retrieve VLAN %d for originator %pM\n",
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
orig_entry->orig_node->orig);
continue;
}
@@ -1876,7 +1876,7 @@ print_list:
seq_printf(seq,
" %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n",
'+', tt_global_entry->common.addr,
- BATADV_PRINT_VID(tt_global_entry->common.vid),
+ batadv_print_vid(tt_global_entry->common.vid),
orig_entry->ttvn, orig_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
@@ -2213,7 +2213,7 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv,
"Deleting %pM from global tt entry %pM (vid: %d): %s\n",
orig_node->orig,
tt_global_entry->common.addr,
- BATADV_PRINT_VID(vid), message);
+ batadv_print_vid(vid), message);
_batadv_tt_global_del_orig_entry(tt_global_entry,
orig_entry);
}
@@ -2253,12 +2253,13 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
/* its the last one, mark for roaming. */
tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM;
tt_global_entry->roam_at = jiffies;
- } else
+ } else {
/* there is another entry, we can simply delete this
* one and can still use the other one.
*/
batadv_tt_global_del_orig_node(bat_priv, tt_global_entry,
orig_node, message);
+ }
}
/**
@@ -2314,10 +2315,11 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
/* local entry exists, case 2: client roamed to us. */
batadv_tt_global_del_orig_list(tt_global_entry);
batadv_tt_global_free(bat_priv, tt_global_entry, message);
- } else
+ } else {
/* no local entry exists, case 1: check for roaming */
batadv_tt_global_del_roaming(bat_priv, tt_global_entry,
orig_node, message);
+ }
out:
if (tt_global_entry)
@@ -2375,7 +2377,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(vid), message);
+ batadv_print_vid(vid), message);
hlist_del_rcu(&tt_common_entry->hash_entry);
batadv_tt_global_entry_put(tt_global);
}
@@ -2435,7 +2437,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(tt_global->common.vid),
+ batadv_print_vid(tt_global->common.vid),
msg);
hlist_del_rcu(&tt_common->hash_entry);
@@ -3650,7 +3652,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n",
- orig_node->orig, client, BATADV_PRINT_VID(vid));
+ orig_node->orig, client, batadv_print_vid(vid));
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
@@ -3773,7 +3775,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting local tt entry (%pM, vid: %d): pending\n",
tt_common->addr,
- BATADV_PRINT_VID(tt_common->vid));
+ batadv_print_vid(tt_common->vid));
batadv_tt_local_size_dec(bat_priv, tt_common->vid);
hlist_del_rcu(&tt_common->hash_entry);
@@ -4017,7 +4019,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n",
- addr, BATADV_PRINT_VID(vid), orig_node->orig);
+ addr, batadv_print_vid(vid), orig_node->orig);
ret = true;
out:
return ret;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 246f21b4973b..ea43a6449247 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1000,7 +1000,6 @@ struct batadv_priv_bat_v {
* struct batadv_priv - per mesh interface data
* @mesh_state: current status of the mesh (inactive/active/deactivating)
* @soft_iface: net device which holds this struct as private data
- * @stats: structure holding the data for the ndo_get_stats() call
* @bat_counters: mesh internal traffic statistic counters (see batadv_counters)
* @aggregated_ogms: bool indicating whether OGM aggregation is enabled
* @bonding: bool indicating whether traffic bonding is enabled
@@ -1055,7 +1054,6 @@ struct batadv_priv_bat_v {
struct batadv_priv {
atomic_t mesh_state;
struct net_device *soft_iface;
- struct net_device_stats stats;
u64 __percpu *bat_counters; /* Per cpu counters */
atomic_t aggregated_ogms;
atomic_t bonding;
@@ -1377,9 +1375,11 @@ struct batadv_nc_packet {
* relevant to batman-adv in the skb->cb buffer in skbs.
* @decoded: Marks a skb as decoded, which is checked when searching for coding
* opportunities in network-coding.c
+ * @num_bcasts: Counter for broadcast packet retransmissions
*/
struct batadv_skb_cb {
bool decoded;
+ unsigned int num_bcasts;
};
/**
@@ -1392,7 +1392,7 @@ struct batadv_skb_cb {
* @skb: bcast packet's skb buffer
* @packet_len: size of aggregated OGM packet inside the skb buffer
* @direct_link_flags: direct link flags for aggregated OGM packets
- * @num_packets: counter for bcast packet retransmission
+ * @num_packets: counter for aggregated OGMv1 packets
* @delayed_work: work queue callback item for packet sending
* @if_incoming: pointer to incoming hard-iface or primary iface if
* locally generated packet
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d491529332f4..608959989f8e 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -20,6 +20,7 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/pkt_sched.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -38,7 +39,6 @@ struct skb_cb {
struct in6_addr addr;
struct in6_addr gw;
struct l2cap_chan *chan;
- int status;
};
#define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb))
@@ -64,7 +64,7 @@ struct lowpan_peer {
struct l2cap_chan *chan;
/* peer addresses in various formats */
- unsigned char eui64_addr[EUI64_ADDR_LEN];
+ unsigned char lladdr[ETH_ALEN];
struct in6_addr peer_addr;
};
@@ -270,28 +270,20 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
}
static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
- struct l2cap_chan *chan)
+ struct lowpan_peer *peer)
{
- const u8 *saddr, *daddr;
+ const u8 *saddr;
struct lowpan_btle_dev *dev;
- struct lowpan_peer *peer;
dev = lowpan_btle_dev(netdev);
- rcu_read_lock();
- peer = __peer_lookup_chan(dev, chan);
- rcu_read_unlock();
- if (!peer)
- return -EINVAL;
-
- saddr = peer->eui64_addr;
- daddr = dev->netdev->dev_addr;
+ saddr = peer->lladdr;
- return lowpan_header_decompress(skb, netdev, daddr, saddr);
+ return lowpan_header_decompress(skb, netdev, netdev->dev_addr, saddr);
}
static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
- struct l2cap_chan *chan)
+ struct lowpan_peer *peer)
{
struct sk_buff *local_skb;
int ret;
@@ -344,8 +336,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->dev = dev;
- ret = iphc_decompress(local_skb, dev, chan);
+ ret = iphc_decompress(local_skb, dev, peer);
if (ret < 0) {
+ BT_DBG("iphc_decompress failed: %d", ret);
kfree_skb(local_skb);
goto drop;
}
@@ -365,6 +358,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
consume_skb(local_skb);
consume_skb(skb);
} else {
+ BT_DBG("unknown packet type");
goto drop;
}
@@ -390,7 +384,7 @@ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (!dev || !dev->netdev)
return -ENOENT;
- err = recv_pkt(skb, dev->netdev, chan);
+ err = recv_pkt(skb, dev->netdev, peer);
if (err) {
BT_DBG("recv pkt %d", err);
err = -EAGAIN;
@@ -399,37 +393,6 @@ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
return err;
}
-static u8 get_addr_type_from_eui64(u8 byte)
-{
- /* Is universal(0) or local(1) bit */
- return ((byte & 0x02) ? BDADDR_LE_RANDOM : BDADDR_LE_PUBLIC);
-}
-
-static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr)
-{
- u8 *eui64 = ip6_daddr->s6_addr + 8;
-
- addr->b[0] = eui64[7];
- addr->b[1] = eui64[6];
- addr->b[2] = eui64[5];
- addr->b[3] = eui64[2];
- addr->b[4] = eui64[1];
- addr->b[5] = eui64[0];
-}
-
-static void convert_dest_bdaddr(struct in6_addr *ip6_daddr,
- bdaddr_t *addr, u8 *addr_type)
-{
- copy_to_bdaddr(ip6_daddr, addr);
-
- /* We need to toggle the U/L bit that we got from IPv6 address
- * so that we get the proper address and type of the BD address.
- */
- addr->b[5] ^= 0x02;
-
- *addr_type = get_addr_type_from_eui64(addr->b[5]);
-}
-
static int setup_header(struct sk_buff *skb, struct net_device *netdev,
bdaddr_t *peer_addr, u8 *peer_addr_type)
{
@@ -437,8 +400,7 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
struct ipv6hdr *hdr;
struct lowpan_btle_dev *dev;
struct lowpan_peer *peer;
- bdaddr_t addr, *any = BDADDR_ANY;
- u8 *daddr = any->b;
+ u8 *daddr;
int err, status = 0;
hdr = ipv6_hdr(skb);
@@ -449,34 +411,24 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
if (ipv6_addr_is_multicast(&ipv6_daddr)) {
lowpan_cb(skb)->chan = NULL;
+ daddr = NULL;
} else {
- u8 addr_type;
+ BT_DBG("dest IP %pI6c", &ipv6_daddr);
- /* Get destination BT device from skb.
- * If there is no such peer then discard the packet.
+ /* The packet might be sent to 6lowpan interface
+ * because of routing (either via default route
+ * or user set route) so get peer according to
+ * the destination address.
*/
- convert_dest_bdaddr(&ipv6_daddr, &addr, &addr_type);
-
- BT_DBG("dest addr %pMR type %d IP %pI6c", &addr,
- addr_type, &ipv6_daddr);
-
- peer = peer_lookup_ba(dev, &addr, addr_type);
+ peer = peer_lookup_dst(dev, &ipv6_daddr, skb);
if (!peer) {
- /* The packet might be sent to 6lowpan interface
- * because of routing (either via default route
- * or user set route) so get peer according to
- * the destination address.
- */
- peer = peer_lookup_dst(dev, &ipv6_daddr, skb);
- if (!peer) {
- BT_DBG("no such peer %pMR found", &addr);
- return -ENOENT;
- }
+ BT_DBG("no such peer");
+ return -ENOENT;
}
- daddr = peer->eui64_addr;
- *peer_addr = addr;
- *peer_addr_type = addr_type;
+ daddr = peer->lladdr;
+ *peer_addr = peer->chan->dst;
+ *peer_addr_type = peer->chan->dst_type;
lowpan_cb(skb)->chan = peer->chan;
status = 1;
@@ -527,15 +479,8 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
return 0;
}
- if (!err)
- err = lowpan_cb(skb)->status;
-
- if (err < 0) {
- if (err == -EAGAIN)
- netdev->stats.tx_dropped++;
- else
- netdev->stats.tx_errors++;
- }
+ if (err < 0)
+ netdev->stats.tx_errors++;
return err;
}
@@ -647,9 +592,9 @@ static void netdev_setup(struct net_device *dev)
{
dev->hard_header_len = 0;
dev->needed_tailroom = 0;
- dev->flags = IFF_RUNNING | IFF_POINTOPOINT |
- IFF_MULTICAST;
+ dev->flags = IFF_RUNNING | IFF_MULTICAST;
dev->watchdog_timeo = 0;
+ dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
dev->netdev_ops = &netdev_ops;
dev->header_ops = &header_ops;
@@ -660,34 +605,6 @@ static struct device_type bt_type = {
.name = "bluetooth",
};
-static void set_addr(u8 *eui, u8 *addr, u8 addr_type)
-{
- /* addr is the BT address in little-endian format */
- eui[0] = addr[5];
- eui[1] = addr[4];
- eui[2] = addr[3];
- eui[3] = 0xFF;
- eui[4] = 0xFE;
- eui[5] = addr[2];
- eui[6] = addr[1];
- eui[7] = addr[0];
-
- /* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */
- if (addr_type == BDADDR_LE_PUBLIC)
- eui[0] &= ~0x02;
- else
- eui[0] |= 0x02;
-
- BT_DBG("type %d addr %*phC", addr_type, 8, eui);
-}
-
-static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr,
- u8 addr_type)
-{
- netdev->addr_assign_type = NET_ADDR_PERM;
- set_addr(netdev->dev_addr, addr->b, addr_type);
-}
-
static void ifup(struct net_device *netdev)
{
int err;
@@ -746,16 +663,9 @@ static struct l2cap_chan *chan_create(void)
return chan;
}
-static void set_ip_addr_bits(u8 addr_type, u8 *addr)
-{
- if (addr_type == BDADDR_LE_PUBLIC)
- *addr |= 0x02;
- else
- *addr &= ~0x02;
-}
-
static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
- struct lowpan_btle_dev *dev)
+ struct lowpan_btle_dev *dev,
+ bool new_netdev)
{
struct lowpan_peer *peer;
@@ -766,19 +676,9 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
peer->chan = chan;
memset(&peer->peer_addr, 0, sizeof(struct in6_addr));
- /* RFC 2464 ch. 5 */
- peer->peer_addr.s6_addr[0] = 0xFE;
- peer->peer_addr.s6_addr[1] = 0x80;
- set_addr((u8 *)&peer->peer_addr.s6_addr + 8, chan->dst.b,
- chan->dst_type);
-
- memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8,
- EUI64_ADDR_LEN);
+ baswap((void *)peer->lladdr, &chan->dst);
- /* IPv6 address needs to have the U/L bit set properly so toggle
- * it back here.
- */
- set_ip_addr_bits(chan->dst_type, (u8 *)&peer->peer_addr.s6_addr + 8);
+ lowpan_iphc_uncompress_eui48_lladdr(&peer->peer_addr, peer->lladdr);
spin_lock(&devices_lock);
INIT_LIST_HEAD(&peer->list);
@@ -786,7 +686,8 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
spin_unlock(&devices_lock);
/* Notifying peers about us needs to be done without locks held */
- INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers);
+ if (new_netdev)
+ INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers);
schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100));
return peer->chan;
@@ -803,7 +704,8 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
if (!netdev)
return -ENOMEM;
- set_dev_addr(netdev, &chan->src, chan->src_type);
+ netdev->addr_assign_type = NET_ADDR_PERM;
+ baswap((void *)netdev->dev_addr, &chan->src);
netdev->netdev_ops = &netdev_ops;
SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
@@ -843,6 +745,7 @@ out:
static inline void chan_ready_cb(struct l2cap_chan *chan)
{
struct lowpan_btle_dev *dev;
+ bool new_netdev = false;
dev = lookup_dev(chan->conn);
@@ -853,12 +756,13 @@ static inline void chan_ready_cb(struct l2cap_chan *chan)
l2cap_chan_del(chan, -ENOENT);
return;
}
+ new_netdev = true;
}
if (!try_module_get(THIS_MODULE))
return;
- add_peer_chan(chan, dev);
+ add_peer_chan(chan, dev, new_netdev);
ifup(dev->netdev);
}
@@ -964,26 +868,28 @@ static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan,
static void chan_suspend_cb(struct l2cap_chan *chan)
{
- struct sk_buff *skb = chan->data;
+ struct lowpan_btle_dev *dev;
- BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb);
+ BT_DBG("chan %p suspend", chan);
- if (!skb)
+ dev = lookup_dev(chan->conn);
+ if (!dev || !dev->netdev)
return;
- lowpan_cb(skb)->status = -EAGAIN;
+ netif_stop_queue(dev->netdev);
}
static void chan_resume_cb(struct l2cap_chan *chan)
{
- struct sk_buff *skb = chan->data;
+ struct lowpan_btle_dev *dev;
- BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb);
+ BT_DBG("chan %p resume", chan);
- if (!skb)
+ dev = lookup_dev(chan->conn);
+ if (!dev || !dev->netdev)
return;
- lowpan_cb(skb)->status = 0;
+ netif_wake_queue(dev->netdev);
}
static long chan_get_sndtimeo_cb(struct l2cap_chan *chan)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 69e1f7d362a8..42d0997e2fbb 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -159,12 +159,17 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk)
BT_DBG("parent %p, sk %p", parent, sk);
sock_hold(sk);
+ lock_sock(sk);
list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ release_sock(sk);
parent->sk_ack_backlog++;
}
EXPORT_SYMBOL(bt_accept_enqueue);
+/* Calling function must hold the sk lock.
+ * bt_sk(sk)->parent must be non-NULL meaning sk is in the parent list.
+ */
void bt_accept_unlink(struct sock *sk)
{
BT_DBG("sk %p state %d", sk, sk->sk_state);
@@ -183,11 +188,32 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
BT_DBG("parent %p", parent);
+restart:
list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) {
sk = (struct sock *)s;
+ /* Prevent early freeing of sk due to unlink and sock_kill */
+ sock_hold(sk);
lock_sock(sk);
+ /* Check sk has not already been unlinked via
+ * bt_accept_unlink() due to serialisation caused by sk locking
+ */
+ if (!bt_sk(sk)->parent) {
+ BT_DBG("sk %p, already unlinked", sk);
+ release_sock(sk);
+ sock_put(sk);
+
+ /* Restart the loop as sk is no longer in the list
+ * and also avoid a potential infinite loop because
+ * list_for_each_entry_safe() is not thread safe.
+ */
+ goto restart;
+ }
+
+ /* sk is safely in the parent list so reduce reference count */
+ sock_put(sk);
+
/* FIXME: Is this check still needed */
if (sk->sk_state == BT_CLOSED) {
bt_accept_unlink(sk);
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index 02a4ccc04e1e..ebcab5bbadd7 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -263,7 +263,7 @@ void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle)
struct hci_cp_read_local_amp_assoc cp;
struct amp_assoc *loc_assoc = &hdev->loc_assoc;
struct hci_request req;
- int err = 0;
+ int err;
BT_DBG("%s handle %d", hdev->name, phy_handle);
@@ -282,7 +282,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
{
struct hci_cp_read_local_amp_assoc cp;
struct hci_request req;
- int err = 0;
+ int err;
memset(&hdev->loc_assoc, 0, sizeof(struct amp_assoc));
memset(&cp, 0, sizeof(cp));
@@ -292,7 +292,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
set_bit(READ_LOC_AMP_ASSOC, &mgr->state);
hci_req_init(&req, hdev);
hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
- hci_req_run_skb(&req, read_local_amp_assoc_complete);
+ err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
if (err < 0)
a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
}
@@ -303,7 +303,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
struct hci_cp_read_local_amp_assoc cp;
struct amp_mgr *mgr = hcon->amp_mgr;
struct hci_request req;
- int err = 0;
+ int err;
cp.phy_handle = hcon->handle;
cp.len_so_far = cpu_to_le16(0);
@@ -314,7 +314,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
/* Read Local AMP Assoc final link information data */
hci_req_init(&req, hdev);
hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
- hci_req_run_skb(&req, read_local_amp_assoc_complete);
+ err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
if (err < 0)
a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3ac89e9ace71..05686776a5fb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2950,8 +2950,8 @@ struct hci_dev *hci_alloc_dev(void)
hdev->le_adv_max_interval = 0x0800;
hdev->le_scan_interval = 0x0060;
hdev->le_scan_window = 0x0030;
- hdev->le_conn_min_interval = 0x0028;
- hdev->le_conn_max_interval = 0x0038;
+ hdev->le_conn_min_interval = 0x0018;
+ hdev->le_conn_max_interval = 0x0028;
hdev->le_conn_latency = 0x0000;
hdev->le_supv_timeout = 0x002a;
hdev->le_def_tx_len = 0x001b;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index fc7f321a3823..f88ac99528ce 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2425,6 +2425,22 @@ static int l2cap_segment_le_sdu(struct l2cap_chan *chan,
return 0;
}
+static void l2cap_le_flowctl_send(struct l2cap_chan *chan)
+{
+ int sent = 0;
+
+ BT_DBG("chan %p", chan);
+
+ while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) {
+ l2cap_do_send(chan, skb_dequeue(&chan->tx_q));
+ chan->tx_credits--;
+ sent++;
+ }
+
+ BT_DBG("Sent %d credits %u queued %u", sent, chan->tx_credits,
+ skb_queue_len(&chan->tx_q));
+}
+
int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
{
struct sk_buff *skb;
@@ -2458,9 +2474,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
if (len > chan->omtu)
return -EMSGSIZE;
- if (!chan->tx_credits)
- return -EAGAIN;
-
__skb_queue_head_init(&seg_queue);
err = l2cap_segment_le_sdu(chan, &seg_queue, msg, len);
@@ -2475,10 +2488,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
skb_queue_splice_tail_init(&seg_queue, &chan->tx_q);
- while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) {
- l2cap_do_send(chan, skb_dequeue(&chan->tx_q));
- chan->tx_credits--;
- }
+ l2cap_le_flowctl_send(chan);
if (!chan->tx_credits)
chan->ops->suspend(chan);
@@ -5570,10 +5580,8 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
chan->tx_credits += credits;
- while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) {
- l2cap_do_send(chan, skb_dequeue(&chan->tx_q));
- chan->tx_credits--;
- }
+ /* Resume sending */
+ l2cap_le_flowctl_send(chan);
if (chan->tx_credits)
chan->ops->resume(chan);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index f7eb02f09b54..8ebca9033d60 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -311,7 +311,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
skb_queue_head_init(&d->tx_queue);
mutex_init(&d->lock);
- atomic_set(&d->refcnt, 1);
+ refcount_set(&d->refcnt, 1);
rfcomm_dlc_clear_state(d);
@@ -342,7 +342,7 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
{
struct rfcomm_session *s = d->session;
- BT_DBG("dlc %p refcnt %d session %p", d, atomic_read(&d->refcnt), s);
+ BT_DBG("dlc %p refcnt %d session %p", d, refcount_read(&d->refcnt), s);
list_del(&d->list);
d->session = NULL;
diff --git a/net/bpf/Makefile b/net/bpf/Makefile
new file mode 100644
index 000000000000..27b2992a0692
--- /dev/null
+++ b/net/bpf/Makefile
@@ -0,0 +1 @@
+obj-y := test_run.o
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
new file mode 100644
index 000000000000..8a6d0a37c30c
--- /dev/null
+++ b/net/bpf/test_run.c
@@ -0,0 +1,172 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/etherdevice.h>
+#include <linux/filter.h>
+#include <linux/sched/signal.h>
+
+static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
+{
+ u32 ret;
+
+ preempt_disable();
+ rcu_read_lock();
+ ret = BPF_PROG_RUN(prog, ctx);
+ rcu_read_unlock();
+ preempt_enable();
+
+ return ret;
+}
+
+static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
+{
+ u64 time_start, time_spent = 0;
+ u32 ret = 0, i;
+
+ if (!repeat)
+ repeat = 1;
+ time_start = ktime_get_ns();
+ for (i = 0; i < repeat; i++) {
+ ret = bpf_test_run_one(prog, ctx);
+ if (need_resched()) {
+ if (signal_pending(current))
+ break;
+ time_spent += ktime_get_ns() - time_start;
+ cond_resched();
+ time_start = ktime_get_ns();
+ }
+ }
+ time_spent += ktime_get_ns() - time_start;
+ do_div(time_spent, repeat);
+ *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+
+ return ret;
+}
+
+static int bpf_test_finish(union bpf_attr __user *uattr, const void *data,
+ u32 size, u32 retval, u32 duration)
+{
+ void __user *data_out = u64_to_user_ptr(uattr->test.data_out);
+ int err = -EFAULT;
+
+ if (data_out && copy_to_user(data_out, data, size))
+ goto out;
+ if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
+ goto out;
+ if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
+ goto out;
+ if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
+ goto out;
+ err = 0;
+out:
+ return err;
+}
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
+ u32 headroom, u32 tailroom)
+{
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+ void *data;
+
+ if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
+ return ERR_PTR(-EINVAL);
+
+ data = kzalloc(size + headroom + tailroom, GFP_USER);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(data + headroom, data_in, size)) {
+ kfree(data);
+ return ERR_PTR(-EFAULT);
+ }
+ return data;
+}
+
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ bool is_l2 = false, is_direct_pkt_access = false;
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ u32 retval, duration;
+ struct sk_buff *skb;
+ void *data;
+ int ret;
+
+ data = bpf_test_init(kattr, size, NET_SKB_PAD,
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ switch (prog->type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ is_l2 = true;
+ /* fall through */
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ is_direct_pkt_access = true;
+ break;
+ default:
+ break;
+ }
+
+ skb = build_skb(data, 0);
+ if (!skb) {
+ kfree(data);
+ return -ENOMEM;
+ }
+
+ skb_reserve(skb, NET_SKB_PAD);
+ __skb_put(skb, size);
+ skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
+ skb_reset_network_header(skb);
+
+ if (is_l2)
+ __skb_push(skb, ETH_HLEN);
+ if (is_direct_pkt_access)
+ bpf_compute_data_end(skb);
+ retval = bpf_test_run(prog, skb, repeat, &duration);
+ if (!is_l2)
+ __skb_push(skb, ETH_HLEN);
+ size = skb->len;
+ /* bpf program can never convert linear skb to non-linear */
+ if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
+ size = skb_headlen(skb);
+ ret = bpf_test_finish(uattr, skb->data, size, retval, duration);
+ kfree_skb(skb);
+ return ret;
+}
+
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ struct xdp_buff xdp = {};
+ u32 retval, duration;
+ void *data;
+ int ret;
+
+ data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ xdp.data_hard_start = data;
+ xdp.data = data + XDP_PACKET_HEADROOM;
+ xdp.data_end = xdp.data + size;
+
+ retval = bpf_test_run(prog, &xdp, repeat, &duration);
+ if (xdp.data != data + XDP_PACKET_HEADROOM)
+ size = xdp.data_end - xdp.data;
+ ret = bpf_test_finish(uattr, xdp.data, size, retval, duration);
+ kfree(data);
+ return ret;
+}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6e08b7199dd7..de7988b0349e 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -594,6 +594,11 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
fdb->updated = now;
if (unlikely(added_by_user))
fdb->added_by_user = 1;
+ /* Take over HW learned entry */
+ if (unlikely(fdb->added_by_external_learn)) {
+ fdb->added_by_external_learn = 0;
+ fdb_modified = true;
+ }
if (unlikely(fdb_modified))
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
@@ -854,6 +859,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
br_fdb_update(br, p, addr, vid, true);
rcu_read_unlock();
local_bh_enable();
+ } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
+ err = br_fdb_external_learn_add(br, p, addr, vid);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm->ndm_state,
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a8d0ed282a10..f3544d96155c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -22,6 +22,7 @@
#include <linux/rtnetlink.h>
#include <linux/if_ether.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include <net/sock.h>
#include <linux/if_vlan.h>
#include <net/switchdev.h>
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 056e6ac49d8f..b0845480a3ae 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -464,7 +464,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev;
int err;
- err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL,
+ NULL);
if (err < 0)
return err;
@@ -568,7 +569,8 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
return ret;
}
-static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
@@ -662,7 +664,8 @@ unlock:
return err;
}
-static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 1f1e62095464..067cf0313449 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -997,13 +997,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
if (!elem)
return okfn(net, sk, skb);
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
ret = nf_hook_slow(skb, &state, elem);
- rcu_read_unlock();
if (ret == 1)
ret = okfn(net, sk, skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 225ef7d53701..650986473577 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -748,8 +748,8 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
if (p && protinfo) {
if (protinfo->nla_type & NLA_F_NESTED) {
- err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
- protinfo, br_port_policy);
+ err = nla_parse_nested(tb, IFLA_BRPORT_MAX, protinfo,
+ br_port_policy, NULL);
if (err)
return err;
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index c913491495ab..3712c7f0e00c 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -227,8 +227,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
memset(tinfo, 0, sizeof(*tinfo));
- err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX,
- attr, vlan_tunnel_policy);
+ err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr,
+ vlan_tunnel_policy, NULL);
if (err < 0)
return err;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 98b9c8e8615e..707caea39743 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -62,10 +62,10 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
pptr = skb_header_pointer(skb, offset,
sizeof(_ports), &_ports);
if (pptr == NULL) {
- printk(" INCOMPLETE TCP/UDP header");
+ pr_cont(" INCOMPLETE TCP/UDP header");
return;
}
- printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
+ pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
}
}
@@ -100,11 +100,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IP header");
+ pr_cont(" INCOMPLETE IP header");
goto out;
}
- printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
- &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
+ pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
+ &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
print_ports(skb, ih->protocol, ih->ihl*4);
goto out;
}
@@ -120,11 +120,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IPv6 header");
+ pr_cont(" INCOMPLETE IPv6 header");
goto out;
}
- printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
- &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
+ pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
+ &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
nexthdr = ih->nexthdr;
offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off);
if (offset_ph == -1)
@@ -142,12 +142,12 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
- printk(" INCOMPLETE ARP header");
+ pr_cont(" INCOMPLETE ARP header");
goto out;
}
- printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
- ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
- ntohs(ah->ar_op));
+ pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
+ ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
+ ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
* then log the ARP payload
@@ -161,17 +161,17 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ap = skb_header_pointer(skb, sizeof(_arph),
sizeof(_arpp), &_arpp);
if (ap == NULL) {
- printk(" INCOMPLETE ARP payload");
+ pr_cont(" INCOMPLETE ARP payload");
goto out;
}
- printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
- ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+ pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
+ ap->mac_src, ap->ip_src,
+ ap->mac_dst, ap->ip_dst);
}
}
out:
- printk("\n");
+ pr_cont("\n");
spin_unlock_bh(&ebt_log_lock);
-
}
static unsigned int
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 206dc266ecd2..346ef6b00b8f 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -375,11 +375,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_bridge_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5488e4a6ccd0..b6406fe33c76 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -2,7 +2,7 @@
* af_can.c - Protocol family CAN core module
* (used by different CAN protocol modules)
*
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -75,20 +75,12 @@ static int stats_timer __read_mostly = 1;
module_param(stats_timer, int, S_IRUGO);
MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
-/* receive filters subscribed for 'all' CAN devices */
-struct dev_rcv_lists can_rx_alldev_list;
-static DEFINE_SPINLOCK(can_rcvlists_lock);
-
static struct kmem_cache *rcv_cache __read_mostly;
/* table of registered CAN protocols */
static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
static DEFINE_MUTEX(proto_tab_lock);
-struct timer_list can_stattimer; /* timer for statistics update */
-struct s_stats can_stats; /* packet statistics */
-struct s_pstats can_pstats; /* receive list statistics */
-
static atomic_t skbcounter = ATOMIC_INIT(0);
/*
@@ -145,9 +137,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
if (protocol < 0 || protocol >= CAN_NPROTO)
return -EINVAL;
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
cp = can_get_proto(protocol);
#ifdef CONFIG_MODULES
@@ -228,6 +217,7 @@ int can_send(struct sk_buff *skb, int loop)
{
struct sk_buff *newskb = NULL;
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+ struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats;
int err = -EINVAL;
if (skb->len == CAN_MTU) {
@@ -316,8 +306,8 @@ int can_send(struct sk_buff *skb, int loop)
netif_rx_ni(newskb);
/* update statistics */
- can_stats.tx_frames++;
- can_stats.tx_frames_delta++;
+ can_stats->tx_frames++;
+ can_stats->tx_frames_delta++;
return 0;
@@ -331,10 +321,11 @@ EXPORT_SYMBOL(can_send);
* af_can rx path
*/
-static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
+static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net,
+ struct net_device *dev)
{
if (!dev)
- return &can_rx_alldev_list;
+ return net->can.can_rx_alldev_list;
else
return (struct dev_rcv_lists *)dev->ml_priv;
}
@@ -467,13 +458,14 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
* -ENOMEM on missing cache mem to create subscription entry
* -ENODEV unknown device
*/
-int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
- void (*func)(struct sk_buff *, void *), void *data,
- char *ident, struct sock *sk)
+int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
+ canid_t mask, void (*func)(struct sk_buff *, void *),
+ void *data, char *ident, struct sock *sk)
{
struct receiver *r;
struct hlist_head *rl;
struct dev_rcv_lists *d;
+ struct s_pstats *can_pstats = net->can.can_pstats;
int err = 0;
/* insert new receiver (dev,canid,mask) -> (func,data) */
@@ -481,13 +473,16 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
if (dev && dev->type != ARPHRD_CAN)
return -ENODEV;
+ if (dev && !net_eq(net, dev_net(dev)))
+ return -ENODEV;
+
r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
if (!r)
return -ENOMEM;
- spin_lock(&can_rcvlists_lock);
+ spin_lock(&net->can.can_rcvlists_lock);
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(net, dev);
if (d) {
rl = find_rcv_list(&can_id, &mask, d);
@@ -502,15 +497,15 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
hlist_add_head_rcu(&r->list, rl);
d->entries++;
- can_pstats.rcv_entries++;
- if (can_pstats.rcv_entries_max < can_pstats.rcv_entries)
- can_pstats.rcv_entries_max = can_pstats.rcv_entries;
+ can_pstats->rcv_entries++;
+ if (can_pstats->rcv_entries_max < can_pstats->rcv_entries)
+ can_pstats->rcv_entries_max = can_pstats->rcv_entries;
} else {
kmem_cache_free(rcv_cache, r);
err = -ENODEV;
}
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&net->can.can_rcvlists_lock);
return err;
}
@@ -540,19 +535,24 @@ static void can_rx_delete_receiver(struct rcu_head *rp)
* Description:
* Removes subscription entry depending on given (subscription) values.
*/
-void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
- void (*func)(struct sk_buff *, void *), void *data)
+void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
+ canid_t mask, void (*func)(struct sk_buff *, void *),
+ void *data)
{
struct receiver *r = NULL;
struct hlist_head *rl;
+ struct s_pstats *can_pstats = net->can.can_pstats;
struct dev_rcv_lists *d;
if (dev && dev->type != ARPHRD_CAN)
return;
- spin_lock(&can_rcvlists_lock);
+ if (dev && !net_eq(net, dev_net(dev)))
+ return;
+
+ spin_lock(&net->can.can_rcvlists_lock);
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(net, dev);
if (!d) {
pr_err("BUG: receive list not found for "
"dev %s, id %03X, mask %03X\n",
@@ -588,8 +588,8 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
hlist_del_rcu(&r->list);
d->entries--;
- if (can_pstats.rcv_entries > 0)
- can_pstats.rcv_entries--;
+ if (can_pstats->rcv_entries > 0)
+ can_pstats->rcv_entries--;
/* remove device structure requested by NETDEV_UNREGISTER */
if (d->remove_on_zero_entries && !d->entries) {
@@ -598,7 +598,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
}
out:
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&net->can.can_rcvlists_lock);
/* schedule the receiver item for deletion */
if (r) {
@@ -683,11 +683,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
static void can_receive(struct sk_buff *skb, struct net_device *dev)
{
struct dev_rcv_lists *d;
+ struct net *net = dev_net(dev);
+ struct s_stats *can_stats = net->can.can_stats;
int matches;
/* update statistics */
- can_stats.rx_frames++;
- can_stats.rx_frames_delta++;
+ can_stats->rx_frames++;
+ can_stats->rx_frames_delta++;
/* create non-zero unique skb identifier together with *skb */
while (!(can_skb_prv(skb)->skbcnt))
@@ -696,10 +698,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
/* deliver the packet to sockets listening on all devices */
- matches = can_rcv_filter(&can_rx_alldev_list, skb);
+ matches = can_rcv_filter(net->can.can_rx_alldev_list, skb);
/* find receive list for this device */
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(net, dev);
if (d)
matches += can_rcv_filter(d, skb);
@@ -709,8 +711,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
if (matches > 0) {
- can_stats.matches++;
- can_stats.matches_delta++;
+ can_stats->matches++;
+ can_stats->matches_delta++;
}
}
@@ -719,9 +721,6 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(!net_eq(dev_net(dev), &init_net)))
- goto drop;
-
if (WARN_ONCE(dev->type != ARPHRD_CAN ||
skb->len != CAN_MTU ||
cfd->len > CAN_MAX_DLEN,
@@ -743,9 +742,6 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(!net_eq(dev_net(dev), &init_net)))
- goto drop;
-
if (WARN_ONCE(dev->type != ARPHRD_CAN ||
skb->len != CANFD_MTU ||
cfd->len > CANFD_MAX_DLEN,
@@ -835,9 +831,6 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dev_rcv_lists *d;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
@@ -855,7 +848,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
break;
case NETDEV_UNREGISTER:
- spin_lock(&can_rcvlists_lock);
+ spin_lock(&dev_net(dev)->can.can_rcvlists_lock);
d = dev->ml_priv;
if (d) {
@@ -869,7 +862,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
pr_err("can: notifier: receive list not found for dev "
"%s\n", dev->name);
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&dev_net(dev)->can.can_rcvlists_lock);
break;
}
@@ -877,6 +870,59 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
return NOTIFY_DONE;
}
+static int can_pernet_init(struct net *net)
+{
+ net->can.can_rcvlists_lock =
+ __SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock);
+ net->can.can_rx_alldev_list =
+ kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
+
+ net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
+ net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
+
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ /* the statistics are updated every second (timer triggered) */
+ if (stats_timer) {
+ setup_timer(&net->can.can_stattimer, can_stat_update,
+ (unsigned long)net);
+ mod_timer(&net->can.can_stattimer,
+ round_jiffies(jiffies + HZ));
+ }
+ net->can.can_stats->jiffies_init = jiffies;
+ can_init_proc(net);
+ }
+
+ return 0;
+}
+
+static void can_pernet_exit(struct net *net)
+{
+ struct net_device *dev;
+
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ can_remove_proc(net);
+ if (stats_timer)
+ del_timer_sync(&net->can.can_stattimer);
+ }
+
+ /* remove created dev_rcv_lists from still registered CAN devices */
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ if (dev->type == ARPHRD_CAN && dev->ml_priv) {
+ struct dev_rcv_lists *d = dev->ml_priv;
+
+ BUG_ON(d->entries);
+ kfree(d);
+ dev->ml_priv = NULL;
+ }
+ }
+ rcu_read_unlock();
+
+ kfree(net->can.can_rx_alldev_list);
+ kfree(net->can.can_stats);
+ kfree(net->can.can_pstats);
+}
+
/*
* af_can module init/exit functions
*/
@@ -902,6 +948,11 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
.notifier_call = can_notifier,
};
+static struct pernet_operations can_pernet_ops __read_mostly = {
+ .init = can_pernet_init,
+ .exit = can_pernet_exit,
+};
+
static __init int can_init(void)
{
/* check for correct padding to be able to use the structs similarly */
@@ -912,21 +963,12 @@ static __init int can_init(void)
pr_info("can: controller area network core (" CAN_VERSION_STRING ")\n");
- memset(&can_rx_alldev_list, 0, sizeof(can_rx_alldev_list));
-
rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
0, 0, NULL);
if (!rcv_cache)
return -ENOMEM;
- if (IS_ENABLED(CONFIG_PROC_FS)) {
- if (stats_timer) {
- /* the statistics are updated every second (timer triggered) */
- setup_timer(&can_stattimer, can_stat_update, 0);
- mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
- }
- can_init_proc();
- }
+ register_pernet_subsys(&can_pernet_ops);
/* protocol register */
sock_register(&can_family_ops);
@@ -939,34 +981,13 @@ static __init int can_init(void)
static __exit void can_exit(void)
{
- struct net_device *dev;
-
- if (IS_ENABLED(CONFIG_PROC_FS)) {
- if (stats_timer)
- del_timer_sync(&can_stattimer);
-
- can_remove_proc();
- }
-
/* protocol unregister */
dev_remove_pack(&canfd_packet);
dev_remove_pack(&can_packet);
unregister_netdevice_notifier(&can_netdev_notifier);
sock_unregister(PF_CAN);
- /* remove created dev_rcv_lists from still registered CAN devices */
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-
- struct dev_rcv_lists *d = dev->ml_priv;
-
- BUG_ON(d->entries);
- kfree(d);
- dev->ml_priv = NULL;
- }
- }
- rcu_read_unlock();
+ unregister_pernet_subsys(&can_pernet_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/can/af_can.h b/net/can/af_can.h
index b86f5129e838..d0ef45bb2a72 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -110,18 +110,9 @@ struct s_pstats {
unsigned long rcv_entries_max;
};
-/* receive filters subscribed for 'all' CAN devices */
-extern struct dev_rcv_lists can_rx_alldev_list;
-
/* function prototypes for the CAN networklayer procfs (proc.c) */
-void can_init_proc(void);
-void can_remove_proc(void);
+void can_init_proc(struct net *net);
+void can_remove_proc(struct net *net);
void can_stat_update(unsigned long data);
-/* structures and variables from af_can.c needed in proc.c for reading */
-extern struct timer_list can_stattimer; /* timer for statistics update */
-extern struct s_stats can_stats; /* packet statistics */
-extern struct s_pstats can_pstats; /* receive list statistics */
-extern struct hlist_head can_rx_dev_list; /* rx dispatcher structures */
-
#endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 95d13b233c65..0e855917b7e1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,7 +1,7 @@
/*
* bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
*
- * Copyright (c) 2002-2016 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,7 +77,7 @@
(CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
(CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
-#define CAN_BCM_VERSION "20161123"
+#define CAN_BCM_VERSION "20170425"
MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
MODULE_LICENSE("Dual BSD/GPL");
@@ -118,8 +118,6 @@ struct bcm_op {
struct net_device *rx_reg_dev;
};
-static struct proc_dir_entry *proc_dir;
-
struct bcm_sock {
struct sock sk;
int bound;
@@ -149,7 +147,7 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
/*
* procfs functions
*/
-static char *bcm_proc_getifname(char *result, int ifindex)
+static char *bcm_proc_getifname(struct net *net, char *result, int ifindex)
{
struct net_device *dev;
@@ -157,7 +155,7 @@ static char *bcm_proc_getifname(char *result, int ifindex)
return "any";
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net, ifindex);
+ dev = dev_get_by_index_rcu(net, ifindex);
if (dev)
strcpy(result, dev->name);
else
@@ -170,7 +168,8 @@ static char *bcm_proc_getifname(char *result, int ifindex)
static int bcm_proc_show(struct seq_file *m, void *v)
{
char ifname[IFNAMSIZ];
- struct sock *sk = (struct sock *)m->private;
+ struct net *net = m->private;
+ struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode);
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
@@ -178,7 +177,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, " / sk %pK", sk);
seq_printf(m, " / bo %pK", bo);
seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs);
- seq_printf(m, " / bound %s", bcm_proc_getifname(ifname, bo->ifindex));
+ seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex));
seq_printf(m, " <<<\n");
list_for_each_entry(op, &bo->rx_ops, list) {
@@ -190,7 +189,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
continue;
seq_printf(m, "rx_op: %03X %-5s ", op->can_id,
- bcm_proc_getifname(ifname, op->ifindex));
+ bcm_proc_getifname(net, ifname, op->ifindex));
if (op->flags & CAN_FD_FRAME)
seq_printf(m, "(%u)", op->nframes);
@@ -219,7 +218,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
list_for_each_entry(op, &bo->tx_ops, list) {
seq_printf(m, "tx_op: %03X %s ", op->can_id,
- bcm_proc_getifname(ifname, op->ifindex));
+ bcm_proc_getifname(net, ifname, op->ifindex));
if (op->flags & CAN_FD_FRAME)
seq_printf(m, "(%u) ", op->nframes);
@@ -242,7 +241,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
static int bcm_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, bcm_proc_show, PDE_DATA(inode));
+ return single_open_net(inode, file, bcm_proc_show);
}
static const struct file_operations bcm_proc_fops = {
@@ -267,7 +266,7 @@ static void bcm_can_tx(struct bcm_op *op)
if (!op->ifindex)
return;
- dev = dev_get_by_index(&init_net, op->ifindex);
+ dev = dev_get_by_index(sock_net(op->sk), op->ifindex);
if (!dev) {
/* RFC: should this bcm_op remove itself here? */
return;
@@ -764,8 +763,8 @@ static void bcm_remove_op(struct bcm_op *op)
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
- can_rx_unregister(dev, op->can_id, REGMASK(op->can_id),
- bcm_rx_handler, op);
+ can_rx_unregister(dev_net(dev), dev, op->can_id,
+ REGMASK(op->can_id), bcm_rx_handler, op);
/* mark as removed subscription */
op->rx_reg_dev = NULL;
@@ -800,7 +799,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
if (op->rx_reg_dev) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net,
+ dev = dev_get_by_index(sock_net(op->sk),
op->ifindex);
if (dev) {
bcm_rx_unreg(dev, op);
@@ -808,7 +807,8 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
}
}
} else
- can_rx_unregister(NULL, op->can_id,
+ can_rx_unregister(sock_net(op->sk), NULL,
+ op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op);
@@ -1220,9 +1220,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (dev) {
- err = can_rx_register(dev, op->can_id,
+ err = can_rx_register(sock_net(sk), dev,
+ op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op,
"bcm", sk);
@@ -1232,7 +1233,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
} else
- err = can_rx_register(NULL, op->can_id,
+ err = can_rx_register(sock_net(sk), NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op, "bcm", sk);
if (err) {
@@ -1272,7 +1273,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk,
return err;
}
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev) {
kfree_skb(skb);
return -ENODEV;
@@ -1337,7 +1338,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev)
return -ENODEV;
@@ -1418,7 +1419,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
struct bcm_op *op;
int notify_enodev = 0;
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), sock_net(sk)))
return NOTIFY_DONE;
if (dev->type != ARPHRD_CAN)
@@ -1490,6 +1491,7 @@ static int bcm_init(struct sock *sk)
static int bcm_release(struct socket *sock)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct bcm_sock *bo;
struct bcm_op *op, *next;
@@ -1521,14 +1523,14 @@ static int bcm_release(struct socket *sock)
if (op->rx_reg_dev) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, op->ifindex);
+ dev = dev_get_by_index(net, op->ifindex);
if (dev) {
bcm_rx_unreg(dev, op);
dev_put(dev);
}
}
} else
- can_rx_unregister(NULL, op->can_id,
+ can_rx_unregister(net, NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op);
@@ -1536,8 +1538,8 @@ static int bcm_release(struct socket *sock)
}
/* remove procfs entry */
- if (proc_dir && bo->bcm_proc_read)
- remove_proc_entry(bo->procname, proc_dir);
+ if (net->can.bcmproc_dir && bo->bcm_proc_read)
+ remove_proc_entry(bo->procname, net->can.bcmproc_dir);
/* remove device reference */
if (bo->bound) {
@@ -1560,6 +1562,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
struct bcm_sock *bo = bcm_sk(sk);
+ struct net *net = sock_net(sk);
int ret = 0;
if (len < sizeof(*addr))
@@ -1576,7 +1579,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
if (addr->can_ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, addr->can_ifindex);
+ dev = dev_get_by_index(net, addr->can_ifindex);
if (!dev) {
ret = -ENODEV;
goto fail;
@@ -1595,11 +1598,11 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
bo->ifindex = 0;
}
- if (proc_dir) {
+ if (net->can.bcmproc_dir) {
/* unique socket address as filename */
sprintf(bo->procname, "%lu", sock_i_ino(sk));
bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
- proc_dir,
+ net->can.bcmproc_dir,
&bcm_proc_fops, sk);
if (!bo->bcm_proc_read) {
ret = -ENOMEM;
@@ -1686,6 +1689,31 @@ static const struct can_proto bcm_can_proto = {
.prot = &bcm_proto,
};
+static int canbcm_pernet_init(struct net *net)
+{
+ /* create /proc/net/can-bcm directory */
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ net->can.bcmproc_dir =
+ proc_net_mkdir(net, "can-bcm", net->proc_net);
+ }
+
+ return 0;
+}
+
+static void canbcm_pernet_exit(struct net *net)
+{
+ /* remove /proc/net/can-bcm directory */
+ if (IS_ENABLED(CONFIG_PROC_FS)) {
+ if (net->can.bcmproc_dir)
+ remove_proc_entry("can-bcm", net->proc_net);
+ }
+}
+
+static struct pernet_operations canbcm_pernet_ops __read_mostly = {
+ .init = canbcm_pernet_init,
+ .exit = canbcm_pernet_exit,
+};
+
static int __init bcm_module_init(void)
{
int err;
@@ -1698,17 +1726,14 @@ static int __init bcm_module_init(void)
return err;
}
- /* create /proc/net/can-bcm directory */
- proc_dir = proc_mkdir("can-bcm", init_net.proc_net);
+ register_pernet_subsys(&canbcm_pernet_ops);
return 0;
}
static void __exit bcm_module_exit(void)
{
can_proto_unregister(&bcm_can_proto);
-
- if (proc_dir)
- remove_proc_entry("can-bcm", init_net.proc_net);
+ unregister_pernet_subsys(&canbcm_pernet_ops);
}
module_init(bcm_module_init);
diff --git a/net/can/gw.c b/net/can/gw.c
index 7056a1a2bb70..29748d844c3f 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1,7 +1,7 @@
/*
* gw.c - CAN frame Gateway/Router/Bridge with netlink interface
*
- * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * Copyright (c) 2017 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
-#define CAN_GW_VERSION "20130117"
+#define CAN_GW_VERSION "20170425"
#define CAN_GW_NAME "can-gw"
MODULE_DESCRIPTION("PF_CAN netlink gateway");
@@ -79,9 +79,7 @@ MODULE_PARM_DESC(max_hops,
__stringify(CGW_MAX_HOPS) " hops, "
"default: " __stringify(CGW_DEFAULT_HOPS) ")");
-static HLIST_HEAD(cgw_list);
static struct notifier_block notifier;
-
static struct kmem_cache *cgw_cache __read_mostly;
/* structure that contains the (on-the-fly) CAN frame modifications */
@@ -438,16 +436,16 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
gwj->handled_frames++;
}
-static inline int cgw_register_filter(struct cgw_job *gwj)
+static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj)
{
- return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
+ return can_rx_register(net, gwj->src.dev, gwj->ccgw.filter.can_id,
gwj->ccgw.filter.can_mask, can_can_gw_rcv,
gwj, "gw", NULL);
}
-static inline void cgw_unregister_filter(struct cgw_job *gwj)
+static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj)
{
- can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id,
+ can_rx_unregister(net, gwj->src.dev, gwj->ccgw.filter.can_id,
gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
}
@@ -455,9 +453,8 @@ static int cgw_notifier(struct notifier_block *nb,
unsigned long msg, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
@@ -468,11 +465,11 @@ static int cgw_notifier(struct notifier_block *nb,
ASSERT_RTNL();
- hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
- cgw_unregister_filter(gwj);
+ cgw_unregister_filter(net, gwj);
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -592,12 +589,13 @@ cancel:
/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */
static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct cgw_job *gwj = NULL;
int idx = 0;
int s_idx = cb->args[0];
rcu_read_lock();
- hlist_for_each_entry_rcu(gwj, &cgw_list, list) {
+ hlist_for_each_entry_rcu(gwj, &net->can.cgw_list, list) {
if (idx < s_idx)
goto cont;
@@ -641,7 +639,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
memset(mod, 0, sizeof(*mod));
err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX,
- cgw_policy);
+ cgw_policy, NULL);
if (err < 0)
return err;
@@ -809,8 +807,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
return 0;
}
-static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ struct net *net = sock_net(skb->sk);
struct rtcanmsg *r;
struct cgw_job *gwj;
struct cf_mod mod;
@@ -841,7 +841,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
/* check for updating an existing job with identical uid */
- hlist_for_each_entry(gwj, &cgw_list, list) {
+ hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
if (gwj->mod.uid != mod.uid)
continue;
@@ -879,7 +879,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
err = -ENODEV;
- gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);
+ gwj->src.dev = __dev_get_by_index(net, gwj->ccgw.src_idx);
if (!gwj->src.dev)
goto out;
@@ -887,7 +887,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (gwj->src.dev->type != ARPHRD_CAN)
goto out;
- gwj->dst.dev = __dev_get_by_index(&init_net, gwj->ccgw.dst_idx);
+ gwj->dst.dev = __dev_get_by_index(net, gwj->ccgw.dst_idx);
if (!gwj->dst.dev)
goto out;
@@ -897,9 +897,9 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = cgw_register_filter(gwj);
+ err = cgw_register_filter(net, gwj);
if (!err)
- hlist_add_head_rcu(&gwj->list, &cgw_list);
+ hlist_add_head_rcu(&gwj->list, &net->can.cgw_list);
out:
if (err)
kmem_cache_free(cgw_cache, gwj);
@@ -907,22 +907,24 @@ out:
return err;
}
-static void cgw_remove_all_jobs(void)
+static void cgw_remove_all_jobs(struct net *net)
{
struct cgw_job *gwj = NULL;
struct hlist_node *nx;
ASSERT_RTNL();
- hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
hlist_del(&gwj->list);
- cgw_unregister_filter(gwj);
+ cgw_unregister_filter(net, gwj);
kmem_cache_free(cgw_cache, gwj);
}
}
-static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ struct net *net = sock_net(skb->sk);
struct cgw_job *gwj = NULL;
struct hlist_node *nx;
struct rtcanmsg *r;
@@ -951,7 +953,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
/* two interface indices both set to 0 => remove all entries */
if (!ccgw.src_idx && !ccgw.dst_idx) {
- cgw_remove_all_jobs();
+ cgw_remove_all_jobs(net);
return 0;
}
@@ -960,7 +962,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
/* remove only the first matching entry */
- hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
if (gwj->flags != r->flags)
continue;
@@ -983,7 +985,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
continue;
hlist_del(&gwj->list);
- cgw_unregister_filter(gwj);
+ cgw_unregister_filter(net, gwj);
kmem_cache_free(cgw_cache, gwj);
err = 0;
break;
@@ -992,6 +994,24 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
+static int __net_init cangw_pernet_init(struct net *net)
+{
+ INIT_HLIST_HEAD(&net->can.cgw_list);
+ return 0;
+}
+
+static void __net_exit cangw_pernet_exit(struct net *net)
+{
+ rtnl_lock();
+ cgw_remove_all_jobs(net);
+ rtnl_unlock();
+}
+
+static struct pernet_operations cangw_pernet_ops = {
+ .init = cangw_pernet_init,
+ .exit = cangw_pernet_exit,
+};
+
static __init int cgw_module_init(void)
{
/* sanitize given module parameter */
@@ -1000,6 +1020,7 @@ static __init int cgw_module_init(void)
pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
max_hops);
+ register_pernet_subsys(&cangw_pernet_ops);
cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
0, 0, NULL);
@@ -1029,10 +1050,7 @@ static __exit void cgw_module_exit(void)
unregister_netdevice_notifier(&notifier);
- rtnl_lock();
- cgw_remove_all_jobs();
- rtnl_unlock();
-
+ unregister_pernet_subsys(&cangw_pernet_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
kmem_cache_destroy(cgw_cache);
diff --git a/net/can/proc.c b/net/can/proc.c
index 85ef7bb0f176..83045f00c63c 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -62,17 +62,6 @@
#define CAN_PROC_RCVLIST_EFF "rcvlist_eff"
#define CAN_PROC_RCVLIST_ERR "rcvlist_err"
-static struct proc_dir_entry *can_dir;
-static struct proc_dir_entry *pde_version;
-static struct proc_dir_entry *pde_stats;
-static struct proc_dir_entry *pde_reset_stats;
-static struct proc_dir_entry *pde_rcvlist_all;
-static struct proc_dir_entry *pde_rcvlist_fil;
-static struct proc_dir_entry *pde_rcvlist_inv;
-static struct proc_dir_entry *pde_rcvlist_sff;
-static struct proc_dir_entry *pde_rcvlist_eff;
-static struct proc_dir_entry *pde_rcvlist_err;
-
static int user_reset;
static const char rx_list_name[][8] = {
@@ -86,21 +75,23 @@ static const char rx_list_name[][8] = {
* af_can statistics stuff
*/
-static void can_init_stats(void)
+static void can_init_stats(struct net *net)
{
+ struct s_stats *can_stats = net->can.can_stats;
+ struct s_pstats *can_pstats = net->can.can_pstats;
/*
* This memset function is called from a timer context (when
* can_stattimer is active which is the default) OR in a process
* context (reading the proc_fs when can_stattimer is disabled).
*/
- memset(&can_stats, 0, sizeof(can_stats));
- can_stats.jiffies_init = jiffies;
+ memset(can_stats, 0, sizeof(struct s_stats));
+ can_stats->jiffies_init = jiffies;
- can_pstats.stats_reset++;
+ can_pstats->stats_reset++;
if (user_reset) {
user_reset = 0;
- can_pstats.user_reset++;
+ can_pstats->user_reset++;
}
}
@@ -126,64 +117,66 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
void can_stat_update(unsigned long data)
{
+ struct net *net = (struct net *)data;
+ struct s_stats *can_stats = net->can.can_stats;
unsigned long j = jiffies; /* snapshot */
/* restart counting in timer context on user request */
if (user_reset)
- can_init_stats();
+ can_init_stats(net);
/* restart counting on jiffies overflow */
- if (j < can_stats.jiffies_init)
- can_init_stats();
+ if (j < can_stats->jiffies_init)
+ can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats.rx_frames > (ULONG_MAX / HZ))
- can_init_stats();
+ if (can_stats->rx_frames > (ULONG_MAX / HZ))
+ can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats.tx_frames > (ULONG_MAX / HZ))
- can_init_stats();
+ if (can_stats->tx_frames > (ULONG_MAX / HZ))
+ can_init_stats(net);
/* matches overflow - very improbable */
- if (can_stats.matches > (ULONG_MAX / 100))
- can_init_stats();
+ if (can_stats->matches > (ULONG_MAX / 100))
+ can_init_stats(net);
/* calc total values */
- if (can_stats.rx_frames)
- can_stats.total_rx_match_ratio = (can_stats.matches * 100) /
- can_stats.rx_frames;
+ if (can_stats->rx_frames)
+ can_stats->total_rx_match_ratio = (can_stats->matches * 100) /
+ can_stats->rx_frames;
- can_stats.total_tx_rate = calc_rate(can_stats.jiffies_init, j,
- can_stats.tx_frames);
- can_stats.total_rx_rate = calc_rate(can_stats.jiffies_init, j,
- can_stats.rx_frames);
+ can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j,
+ can_stats->tx_frames);
+ can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j,
+ can_stats->rx_frames);
/* calc current values */
- if (can_stats.rx_frames_delta)
- can_stats.current_rx_match_ratio =
- (can_stats.matches_delta * 100) /
- can_stats.rx_frames_delta;
+ if (can_stats->rx_frames_delta)
+ can_stats->current_rx_match_ratio =
+ (can_stats->matches_delta * 100) /
+ can_stats->rx_frames_delta;
- can_stats.current_tx_rate = calc_rate(0, HZ, can_stats.tx_frames_delta);
- can_stats.current_rx_rate = calc_rate(0, HZ, can_stats.rx_frames_delta);
+ can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta);
+ can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta);
/* check / update maximum values */
- if (can_stats.max_tx_rate < can_stats.current_tx_rate)
- can_stats.max_tx_rate = can_stats.current_tx_rate;
+ if (can_stats->max_tx_rate < can_stats->current_tx_rate)
+ can_stats->max_tx_rate = can_stats->current_tx_rate;
- if (can_stats.max_rx_rate < can_stats.current_rx_rate)
- can_stats.max_rx_rate = can_stats.current_rx_rate;
+ if (can_stats->max_rx_rate < can_stats->current_rx_rate)
+ can_stats->max_rx_rate = can_stats->current_rx_rate;
- if (can_stats.max_rx_match_ratio < can_stats.current_rx_match_ratio)
- can_stats.max_rx_match_ratio = can_stats.current_rx_match_ratio;
+ if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio)
+ can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio;
/* clear values for 'current rate' calculation */
- can_stats.tx_frames_delta = 0;
- can_stats.rx_frames_delta = 0;
- can_stats.matches_delta = 0;
+ can_stats->tx_frames_delta = 0;
+ can_stats->rx_frames_delta = 0;
+ can_stats->matches_delta = 0;
/* restart timer (one second) */
- mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
+ mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ));
}
/*
@@ -217,57 +210,61 @@ static void can_print_recv_banner(struct seq_file *m)
static int can_stats_proc_show(struct seq_file *m, void *v)
{
+ struct net *net = m->private;
+ struct s_stats *can_stats = net->can.can_stats;
+ struct s_pstats *can_pstats = net->can.can_pstats;
+
seq_putc(m, '\n');
- seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats.tx_frames);
- seq_printf(m, " %8ld received frames (RXF)\n", can_stats.rx_frames);
- seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats.matches);
+ seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
+ seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames);
+ seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches);
seq_putc(m, '\n');
- if (can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == can_stat_update) {
seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
- can_stats.total_rx_match_ratio);
+ can_stats->total_rx_match_ratio);
seq_printf(m, " %8ld frames/s total tx rate (TXR)\n",
- can_stats.total_tx_rate);
+ can_stats->total_tx_rate);
seq_printf(m, " %8ld frames/s total rx rate (RXR)\n",
- can_stats.total_rx_rate);
+ can_stats->total_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% current match ratio (CRXMR)\n",
- can_stats.current_rx_match_ratio);
+ can_stats->current_rx_match_ratio);
seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n",
- can_stats.current_tx_rate);
+ can_stats->current_tx_rate);
seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n",
- can_stats.current_rx_rate);
+ can_stats->current_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% max match ratio (MRXMR)\n",
- can_stats.max_rx_match_ratio);
+ can_stats->max_rx_match_ratio);
seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n",
- can_stats.max_tx_rate);
+ can_stats->max_tx_rate);
seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n",
- can_stats.max_rx_rate);
+ can_stats->max_rx_rate);
seq_putc(m, '\n');
}
seq_printf(m, " %8ld current receive list entries (CRCV)\n",
- can_pstats.rcv_entries);
+ can_pstats->rcv_entries);
seq_printf(m, " %8ld maximum receive list entries (MRCV)\n",
- can_pstats.rcv_entries_max);
+ can_pstats->rcv_entries_max);
- if (can_pstats.stats_reset)
+ if (can_pstats->stats_reset)
seq_printf(m, "\n %8ld statistic resets (STR)\n",
- can_pstats.stats_reset);
+ can_pstats->stats_reset);
- if (can_pstats.user_reset)
+ if (can_pstats->user_reset)
seq_printf(m, " %8ld user statistic resets (USTR)\n",
- can_pstats.user_reset);
+ can_pstats->user_reset);
seq_putc(m, '\n');
return 0;
@@ -275,7 +272,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
static int can_stats_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_stats_proc_show, NULL);
+ return single_open_net(inode, file, can_stats_proc_show);
}
static const struct file_operations can_stats_proc_fops = {
@@ -288,25 +285,28 @@ static const struct file_operations can_stats_proc_fops = {
static int can_reset_stats_proc_show(struct seq_file *m, void *v)
{
+ struct net *net = m->private;
+ struct s_pstats *can_pstats = net->can.can_pstats;
+ struct s_stats *can_stats = net->can.can_stats;
+
user_reset = 1;
- if (can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == can_stat_update) {
seq_printf(m, "Scheduled statistic reset #%ld.\n",
- can_pstats.stats_reset + 1);
-
+ can_pstats->stats_reset + 1);
} else {
- if (can_stats.jiffies_init != jiffies)
- can_init_stats();
+ if (can_stats->jiffies_init != jiffies)
+ can_init_stats(net);
seq_printf(m, "Performed statistic reset #%ld.\n",
- can_pstats.stats_reset);
+ can_pstats->stats_reset);
}
return 0;
}
static int can_reset_stats_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_reset_stats_proc_show, NULL);
+ return single_open_net(inode, file, can_reset_stats_proc_show);
}
static const struct file_operations can_reset_stats_proc_fops = {
@@ -325,7 +325,7 @@ static int can_version_proc_show(struct seq_file *m, void *v)
static int can_version_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_version_proc_show, NULL);
+ return single_open_net(inode, file, can_version_proc_show);
}
static const struct file_operations can_version_proc_fops = {
@@ -351,20 +351,21 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
static int can_rcvlist_proc_show(struct seq_file *m, void *v)
{
/* double cast to prevent GCC warning */
- int idx = (int)(long)m->private;
+ int idx = (int)(long)PDE_DATA(m->file->f_inode);
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
rcu_read_lock();
/* receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_one(m, idx, NULL, d);
/* receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv)
can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv);
}
@@ -377,7 +378,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode));
+ return single_open_net(inode, file, can_rcvlist_proc_show);
}
static const struct file_operations can_rcvlist_proc_fops = {
@@ -417,6 +418,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
/* RX_SFF */
seq_puts(m, "\nreceive list 'rx_sff':\n");
@@ -424,11 +426,11 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* sff receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
/* sff receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
d = dev->ml_priv;
can_rcvlist_proc_show_array(m, dev, d->rx_sff,
@@ -444,7 +446,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_sff_proc_show, NULL);
+ return single_open_net(inode, file, can_rcvlist_sff_proc_show);
}
static const struct file_operations can_rcvlist_sff_proc_fops = {
@@ -460,6 +462,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
/* RX_EFF */
seq_puts(m, "\nreceive list 'rx_eff':\n");
@@ -467,11 +470,11 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* eff receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
/* eff receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
d = dev->ml_priv;
can_rcvlist_proc_show_array(m, dev, d->rx_eff,
@@ -487,7 +490,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_eff_proc_show, NULL);
+ return single_open_net(inode, file, can_rcvlist_eff_proc_show);
}
static const struct file_operations can_rcvlist_eff_proc_fops = {
@@ -499,81 +502,85 @@ static const struct file_operations can_rcvlist_eff_proc_fops = {
};
/*
- * proc utility functions
- */
-
-static void can_remove_proc_readentry(const char *name)
-{
- if (can_dir)
- remove_proc_entry(name, can_dir);
-}
-
-/*
* can_init_proc - create main CAN proc directory and procfs entries
*/
-void can_init_proc(void)
+void can_init_proc(struct net *net)
{
/* create /proc/net/can directory */
- can_dir = proc_mkdir("can", init_net.proc_net);
+ net->can.proc_dir = proc_net_mkdir(net, "can", net->proc_net);
- if (!can_dir) {
- pr_info("can: failed to create /proc/net/can.\n");
+ if (!net->can.proc_dir) {
+ printk(KERN_INFO "can: failed to create /proc/net/can . "
+ "CONFIG_PROC_FS missing?\n");
return;
}
/* own procfs entries from the AF_CAN core */
- pde_version = proc_create(CAN_PROC_VERSION, 0644, can_dir,
- &can_version_proc_fops);
- pde_stats = proc_create(CAN_PROC_STATS, 0644, can_dir,
- &can_stats_proc_fops);
- pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, can_dir,
- &can_reset_stats_proc_fops);
- pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_ERR);
- pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_ALL);
- pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_FIL);
- pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_INV);
- pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir,
- &can_rcvlist_eff_proc_fops);
- pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir,
- &can_rcvlist_sff_proc_fops);
+ net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644,
+ net->can.proc_dir,
+ &can_version_proc_fops);
+ net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644,
+ net->can.proc_dir,
+ &can_stats_proc_fops);
+ net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644,
+ net->can.proc_dir,
+ &can_reset_stats_proc_fops);
+ net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_ERR);
+ net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_ALL);
+ net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_FIL);
+ net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_INV);
+ net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_eff_proc_fops);
+ net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_sff_proc_fops);
}
/*
* can_remove_proc - remove procfs entries and main CAN proc directory
*/
-void can_remove_proc(void)
+void can_remove_proc(struct net *net)
{
- if (pde_version)
- can_remove_proc_readentry(CAN_PROC_VERSION);
+ if (net->can.pde_version)
+ remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir);
- if (pde_stats)
- can_remove_proc_readentry(CAN_PROC_STATS);
+ if (net->can.pde_stats)
+ remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir);
- if (pde_reset_stats)
- can_remove_proc_readentry(CAN_PROC_RESET_STATS);
+ if (net->can.pde_reset_stats)
+ remove_proc_entry(CAN_PROC_RESET_STATS, net->can.proc_dir);
- if (pde_rcvlist_err)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR);
+ if (net->can.pde_rcvlist_err)
+ remove_proc_entry(CAN_PROC_RCVLIST_ERR, net->can.proc_dir);
- if (pde_rcvlist_all)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL);
+ if (net->can.pde_rcvlist_all)
+ remove_proc_entry(CAN_PROC_RCVLIST_ALL, net->can.proc_dir);
- if (pde_rcvlist_fil)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL);
+ if (net->can.pde_rcvlist_fil)
+ remove_proc_entry(CAN_PROC_RCVLIST_FIL, net->can.proc_dir);
- if (pde_rcvlist_inv)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_INV);
+ if (net->can.pde_rcvlist_inv)
+ remove_proc_entry(CAN_PROC_RCVLIST_INV, net->can.proc_dir);
- if (pde_rcvlist_eff)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF);
+ if (net->can.pde_rcvlist_eff)
+ remove_proc_entry(CAN_PROC_RCVLIST_EFF, net->can.proc_dir);
- if (pde_rcvlist_sff)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF);
+ if (net->can.pde_rcvlist_sff)
+ remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir);
- if (can_dir)
- remove_proc_entry("can", init_net.proc_net);
+ if (net->can.proc_dir)
+ remove_proc_entry("can", net->proc_net);
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 6dc546a06673..864c80dbdb72 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -181,20 +181,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
kfree_skb(skb);
}
-static int raw_enable_filters(struct net_device *dev, struct sock *sk,
- struct can_filter *filter, int count)
+static int raw_enable_filters(struct net *net, struct net_device *dev,
+ struct sock *sk, struct can_filter *filter,
+ int count)
{
int err = 0;
int i;
for (i = 0; i < count; i++) {
- err = can_rx_register(dev, filter[i].can_id,
+ err = can_rx_register(net, dev, filter[i].can_id,
filter[i].can_mask,
raw_rcv, sk, "raw", sk);
if (err) {
/* clean up successfully registered filters */
while (--i >= 0)
- can_rx_unregister(dev, filter[i].can_id,
+ can_rx_unregister(net, dev, filter[i].can_id,
filter[i].can_mask,
raw_rcv, sk);
break;
@@ -204,57 +205,62 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk,
return err;
}
-static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
- can_err_mask_t err_mask)
+static int raw_enable_errfilter(struct net *net, struct net_device *dev,
+ struct sock *sk, can_err_mask_t err_mask)
{
int err = 0;
if (err_mask)
- err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
+ err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG,
raw_rcv, sk, "raw", sk);
return err;
}
-static void raw_disable_filters(struct net_device *dev, struct sock *sk,
- struct can_filter *filter, int count)
+static void raw_disable_filters(struct net *net, struct net_device *dev,
+ struct sock *sk, struct can_filter *filter,
+ int count)
{
int i;
for (i = 0; i < count; i++)
- can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask,
- raw_rcv, sk);
+ can_rx_unregister(net, dev, filter[i].can_id,
+ filter[i].can_mask, raw_rcv, sk);
}
-static inline void raw_disable_errfilter(struct net_device *dev,
+static inline void raw_disable_errfilter(struct net *net,
+ struct net_device *dev,
struct sock *sk,
can_err_mask_t err_mask)
{
if (err_mask)
- can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG,
+ can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG,
raw_rcv, sk);
}
-static inline void raw_disable_allfilters(struct net_device *dev,
+static inline void raw_disable_allfilters(struct net *net,
+ struct net_device *dev,
struct sock *sk)
{
struct raw_sock *ro = raw_sk(sk);
- raw_disable_filters(dev, sk, ro->filter, ro->count);
- raw_disable_errfilter(dev, sk, ro->err_mask);
+ raw_disable_filters(net, dev, sk, ro->filter, ro->count);
+ raw_disable_errfilter(net, dev, sk, ro->err_mask);
}
-static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
+static int raw_enable_allfilters(struct net *net, struct net_device *dev,
+ struct sock *sk)
{
struct raw_sock *ro = raw_sk(sk);
int err;
- err = raw_enable_filters(dev, sk, ro->filter, ro->count);
+ err = raw_enable_filters(net, dev, sk, ro->filter, ro->count);
if (!err) {
- err = raw_enable_errfilter(dev, sk, ro->err_mask);
+ err = raw_enable_errfilter(net, dev, sk, ro->err_mask);
if (err)
- raw_disable_filters(dev, sk, ro->filter, ro->count);
+ raw_disable_filters(net, dev, sk, ro->filter,
+ ro->count);
}
return err;
@@ -267,7 +273,7 @@ static int raw_notifier(struct notifier_block *nb,
struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
struct sock *sk = &ro->sk;
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), sock_net(sk)))
return NOTIFY_DONE;
if (dev->type != ARPHRD_CAN)
@@ -282,7 +288,7 @@ static int raw_notifier(struct notifier_block *nb,
lock_sock(sk);
/* remove current filters & unregister */
if (ro->bound)
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev), dev, sk);
if (ro->count > 1)
kfree(ro->filter);
@@ -358,13 +364,13 @@ static int raw_release(struct socket *sock)
if (ro->ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
if (dev) {
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev), dev, sk);
dev_put(dev);
}
} else
- raw_disable_allfilters(NULL, sk);
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
}
if (ro->count > 1)
@@ -404,7 +410,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (addr->can_ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, addr->can_ifindex);
+ dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
if (!dev) {
err = -ENODEV;
goto out;
@@ -420,13 +426,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
/* filters set by default/setsockopt */
- err = raw_enable_allfilters(dev, sk);
+ err = raw_enable_allfilters(sock_net(sk), dev, sk);
dev_put(dev);
} else {
ifindex = 0;
/* filters set by default/setsockopt */
- err = raw_enable_allfilters(NULL, sk);
+ err = raw_enable_allfilters(sock_net(sk), NULL, sk);
}
if (!err) {
@@ -435,13 +441,15 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (ro->ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk),
+ ro->ifindex);
if (dev) {
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev),
+ dev, sk);
dev_put(dev);
}
} else
- raw_disable_allfilters(NULL, sk);
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
}
ro->ifindex = ifindex;
ro->bound = 1;
@@ -517,15 +525,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (ro->bound && ro->ifindex)
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
if (ro->bound) {
/* (try to) register the new filters */
if (count == 1)
- err = raw_enable_filters(dev, sk, &sfilter, 1);
+ err = raw_enable_filters(sock_net(sk), dev, sk,
+ &sfilter, 1);
else
- err = raw_enable_filters(dev, sk, filter,
- count);
+ err = raw_enable_filters(sock_net(sk), dev, sk,
+ filter, count);
if (err) {
if (count > 1)
kfree(filter);
@@ -533,7 +542,8 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
}
/* remove old filter registrations */
- raw_disable_filters(dev, sk, ro->filter, ro->count);
+ raw_disable_filters(sock_net(sk), dev, sk, ro->filter,
+ ro->count);
}
/* remove old filter space */
@@ -569,18 +579,20 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (ro->bound && ro->ifindex)
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
/* remove current error mask */
if (ro->bound) {
/* (try to) register the new err_mask */
- err = raw_enable_errfilter(dev, sk, err_mask);
+ err = raw_enable_errfilter(sock_net(sk), dev, sk,
+ err_mask);
if (err)
goto out_err;
/* remove old err_mask registration */
- raw_disable_errfilter(dev, sk, ro->err_mask);
+ raw_disable_errfilter(sock_net(sk), dev, sk,
+ ro->err_mask);
}
/* link new err_mask to the socket */
@@ -741,7 +753,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return -EINVAL;
}
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev)
return -ENXIO;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f4947e737f34..15ef99469cfe 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
- } while (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, flags & MSG_DONTWAIT));
+
+ if (!sk_can_busy_loop(sk))
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+ } while (!skb_queue_empty(&sk->sk_receive_queue));
error = -EAGAIN;
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b5875388c23..3361ee87fcc2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -95,6 +95,7 @@
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
@@ -2975,6 +2976,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
__skb_linearize(skb))
goto out_kfree_skb;
+ if (validate_xmit_xfrm(skb, features))
+ goto out_kfree_skb;
+
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
@@ -3444,6 +3448,7 @@ EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
+unsigned int __read_mostly netdev_budget_usecs = 2000;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -4250,6 +4255,125 @@ static int __netif_receive_skb(struct sk_buff *skb)
return ret;
}
+static struct static_key generic_xdp_needed __read_mostly;
+
+static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ struct bpf_prog *new = xdp->prog;
+ int ret = 0;
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG: {
+ struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
+
+ rcu_assign_pointer(dev->xdp_prog, new);
+ if (old)
+ bpf_prog_put(old);
+
+ if (old && !new) {
+ static_key_slow_dec(&generic_xdp_needed);
+ } else if (new && !old) {
+ static_key_slow_inc(&generic_xdp_needed);
+ dev_disable_lro(dev);
+ }
+ break;
+ }
+
+ case XDP_QUERY_PROG:
+ xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct xdp_buff xdp;
+ u32 act = XDP_DROP;
+ void *orig_data;
+ int hlen, off;
+ u32 mac_len;
+
+ /* Reinjected packets coming from act_mirred or similar should
+ * not get XDP generic processing.
+ */
+ if (skb_cloned(skb))
+ return XDP_PASS;
+
+ if (skb_linearize(skb))
+ goto do_drop;
+
+ /* The XDP program wants to see the packet starting at the MAC
+ * header.
+ */
+ mac_len = skb->data - skb_mac_header(skb);
+ hlen = skb_headlen(skb) + mac_len;
+ xdp.data = skb->data - mac_len;
+ xdp.data_end = xdp.data + hlen;
+ xdp.data_hard_start = skb->data - skb_headroom(skb);
+ orig_data = xdp.data;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ off = xdp.data - orig_data;
+ if (off > 0)
+ __skb_pull(skb, off);
+ else if (off < 0)
+ __skb_push(skb, -off);
+
+ switch (act) {
+ case XDP_TX:
+ __skb_push(skb, mac_len);
+ /* fall through */
+ case XDP_PASS:
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(skb->dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ do_drop:
+ kfree_skb(skb);
+ break;
+ }
+
+ return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
+ bool free_skb = true;
+ int cpu, rc;
+
+ txq = netdev_pick_tx(dev, skb, NULL);
+ cpu = smp_processor_id();
+ HARD_TX_LOCK(dev, txq, cpu);
+ if (!netif_xmit_stopped(txq)) {
+ rc = netdev_start_xmit(skb, dev, txq, 0);
+ if (dev_xmit_complete(rc))
+ free_skb = false;
+ }
+ HARD_TX_UNLOCK(dev, txq);
+ if (free_skb) {
+ trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ kfree_skb(skb);
+ }
+}
+
static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
@@ -4261,6 +4385,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
rcu_read_lock();
+ if (static_key_false(&generic_xdp_needed)) {
+ struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+
+ if (xdp_prog) {
+ u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+
+ if (act != XDP_PASS) {
+ rcu_read_unlock();
+ if (act == XDP_TX)
+ generic_xdp_tx(skb, xdp_prog);
+ return NET_RX_DROP;
+ }
+ }
+ }
+
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4493,7 +4632,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
enum gro_result ret;
int grow;
- if (!(skb->dev->features & NETIF_F_GRO))
+ if (netif_elide_gro(skb->dev))
goto normal;
if (skb->csum_bad)
@@ -5063,27 +5202,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
do_softirq();
}
-bool sk_busy_loop(struct sock *sk, int nonblock)
+void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg)
{
- unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+ unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
void *have_poll_lock = NULL;
struct napi_struct *napi;
- int rc;
restart:
- rc = false;
napi_poll = NULL;
rcu_read_lock();
- napi = napi_by_id(sk->sk_napi_id);
+ napi = napi_by_id(napi_id);
if (!napi)
goto out;
preempt_disable();
for (;;) {
- rc = 0;
+ int work = 0;
+
local_bh_disable();
if (!napi_poll) {
unsigned long val = READ_ONCE(napi->state);
@@ -5101,16 +5241,15 @@ restart:
have_poll_lock = netpoll_poll_lock(napi);
napi_poll = napi->poll;
}
- rc = napi_poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+ work = napi_poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
count:
- if (rc > 0)
- __NET_ADD_STATS(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ if (work > 0)
+ __NET_ADD_STATS(dev_net(napi->dev),
+ LINUX_MIB_BUSYPOLLRXPACKETS, work);
local_bh_enable();
- if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
- busy_loop_timeout(end_time))
+ if (!loop_end || loop_end(loop_end_arg, start_time))
break;
if (unlikely(need_resched())) {
@@ -5119,9 +5258,8 @@ count:
preempt_enable();
rcu_read_unlock();
cond_resched();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
- if (rc || busy_loop_timeout(end_time))
- return rc;
+ if (loop_end(loop_end_arg, start_time))
+ return;
goto restart;
}
cpu_relax();
@@ -5129,12 +5267,10 @@ count:
if (napi_poll)
busy_poll_stop(napi, have_poll_lock);
preempt_enable();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock();
- return rc;
}
-EXPORT_SYMBOL(sk_busy_loop);
+EXPORT_SYMBOL(napi_busy_loop);
#endif /* CONFIG_NET_RX_BUSY_POLL */
@@ -5146,10 +5282,10 @@ static void napi_hash_add(struct napi_struct *napi)
spin_lock(&napi_hash_lock);
- /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+ /* 0..NR_CPUS range is reserved for sender_cpu use */
do {
- if (unlikely(++napi_gen_id < NR_CPUS + 1))
- napi_gen_id = NR_CPUS + 1;
+ if (unlikely(++napi_gen_id < MIN_NAPI_ID))
+ napi_gen_id = MIN_NAPI_ID;
} while (napi_by_id(napi_gen_id));
napi->napi_id = napi_gen_id;
@@ -5313,7 +5449,8 @@ out_unlock:
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies +
+ usecs_to_jiffies(netdev_budget_usecs);
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -6724,6 +6861,7 @@ EXPORT_SYMBOL(dev_change_proto_down);
*/
int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
{
+ int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
struct netdev_xdp xdp;
@@ -6731,14 +6869,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
ASSERT_RTNL();
- if (!ops->ndo_xdp)
- return -EOPNOTSUPP;
+ xdp_op = ops->ndo_xdp;
+ if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
+ xdp_op = generic_xdp_install;
+
if (fd >= 0) {
if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_QUERY_PROG;
- err = ops->ndo_xdp(dev, &xdp);
+ err = xdp_op(dev, &xdp);
if (err < 0)
return err;
if (xdp.prog_attached)
@@ -6754,7 +6894,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
- err = ops->ndo_xdp(dev, &xdp);
+ err = xdp_op(dev, &xdp);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7794,6 +7934,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
+ struct bpf_prog *prog;
might_sleep();
netif_free_tx_queues(dev);
@@ -7812,6 +7953,12 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+ prog = rcu_dereference_protected(dev->xdp_prog, 1);
+ if (prog) {
+ bpf_prog_put(prog);
+ static_key_slow_dec(&generic_xdp_needed);
+ }
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e9c1e6acfb6d..b0b87a292e7c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1397,10 +1397,10 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
u32 seq, int flags)
{
const struct devlink_ops *ops = devlink->ops;
+ u8 inline_mode, encap_mode;
void *hdr;
int err = 0;
u16 mode;
- u8 inline_mode;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
if (!hdr)
@@ -1429,6 +1429,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
goto nla_put_failure;
}
+ if (ops->eswitch_encap_mode_get) {
+ err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
genlmsg_end(msg, hdr);
return 0;
@@ -1468,9 +1477,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
- u16 mode;
- u8 inline_mode;
+ u8 inline_mode, encap_mode;
int err = 0;
+ u16 mode;
if (!ops)
return -EOPNOTSUPP;
@@ -1494,7 +1503,695 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
return err;
}
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ if (!ops->eswitch_encap_mode_set)
+ return -EOPNOTSUPP;
+ encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+ struct devlink_dpipe_match *match)
+{
+ struct devlink_dpipe_header *header = match->header;
+ struct devlink_dpipe_field *field = &header->fields[match->field_id];
+ struct nlattr *match_attr;
+
+ match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH);
+ if (!match_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, match_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, match_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *matches_attr;
+
+ matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+ if (!matches_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->matches_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, matches_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, matches_attr);
+ return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+ struct devlink_dpipe_action *action)
+{
+ struct devlink_dpipe_header *header = action->header;
+ struct devlink_dpipe_field *field = &header->fields[action->field_id];
+ struct nlattr *action_attr;
+
+ action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION);
+ if (!action_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, action_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, action_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *actions_attr;
+
+ actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+ if (!actions_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->actions_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, actions_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, actions_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+ struct devlink_dpipe_table *table)
+{
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+ table->counters_enabled))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_matches_put(table, skb))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_actions_put(table, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, table_attr);
return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, table_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+ struct genl_info *info)
+{
+ int err;
+
+ if (*pskb) {
+ err = genlmsg_reply(*pskb, info);
+ if (err)
+ return err;
+ }
+ *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*pskb)
+ return -ENOMEM;
+ return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ struct nlattr *tables_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ table = list_first_entry(dpipe_tables,
+ struct devlink_dpipe_table, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES);
+ if (!tables_attr)
+ goto nla_put_failure;
+
+ i = 0;
+ incomplete = false;
+ list_for_each_entry_from(table, dpipe_tables, list) {
+ if (!table_name) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err) {
+ if (!i)
+ goto err_table_put;
+ incomplete = true;
+ break;
+ }
+ } else {
+ if (!strcmp(table->name, table_name)) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err)
+ break;
+ }
+ }
+ i++;
+ }
+
+ nla_nest_end(skb, tables_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name = NULL;
+
+ if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+ return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+ &devlink->dpipe_table_list,
+ table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+ value->value_size, value->value))
+ return -EMSGSIZE;
+ if (value->mask)
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+ value->value_size, value->mask))
+ return -EMSGSIZE;
+ if (value->mapping_valid)
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+ value->mapping_value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->action)
+ return -EINVAL;
+ if (devlink_dpipe_action_put(skb, value->action))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *action_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ action_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+ if (!action_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_action_value_put(skb, &values[i]);
+ if (err)
+ goto err_action_value_put;
+ nla_nest_end(skb, action_attr);
+ }
+ return 0;
+
+err_action_value_put:
+ nla_nest_cancel(skb, action_attr);
+ return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->match)
+ return -EINVAL;
+ if (devlink_dpipe_match_put(skb, value->match))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *match_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ match_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+ if (!match_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_match_value_put(skb, &values[i]);
+ if (err)
+ goto err_match_value_put;
+ nla_nest_end(skb, match_attr);
+ }
+ return 0;
+
+err_match_value_put:
+ nla_nest_cancel(skb, match_attr);
+ return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+ struct devlink_dpipe_entry *entry)
+{
+ struct nlattr *entry_attr, *matches_attr, *actions_attr;
+ int err;
+
+ entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (entry->counter_valid)
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ matches_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+ if (!matches_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_match_values_put(skb, entry->match_values,
+ entry->match_values_count);
+ if (err) {
+ nla_nest_cancel(skb, matches_attr);
+ goto err_match_values_put;
+ }
+ nla_nest_end(skb, matches_attr);
+
+ actions_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+ if (!actions_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_action_values_put(skb, entry->action_values,
+ entry->action_values_count);
+ if (err) {
+ nla_nest_cancel(skb, actions_attr);
+ goto err_action_values_put;
+ }
+ nla_nest_end(skb, actions_attr);
+
+ nla_nest_end(skb, entry_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+ nla_nest_cancel(skb, entry_attr);
+ return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ list_for_each_entry_rcu(table, dpipe_tables, list) {
+ if (!strcmp(table->name, table_name))
+ return table;
+ }
+ return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink *devlink;
+ int err;
+
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+ dump_ctx->info);
+ if (err)
+ return err;
+
+ dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+ dump_ctx->info->snd_portid,
+ dump_ctx->info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI,
+ dump_ctx->cmd);
+ if (!dump_ctx->hdr)
+ goto nla_put_failure;
+
+ devlink = dump_ctx->info->user_ptr[0];
+ if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+ goto nla_put_failure;
+ dump_ctx->nest = nla_nest_start(dump_ctx->skb,
+ DEVLINK_ATTR_DPIPE_ENTRIES);
+ if (!dump_ctx->nest)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr);
+ nlmsg_free(dump_ctx->skb);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+ struct devlink_dpipe_entry *entry)
+{
+ return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+ genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_table *table)
+{
+ struct devlink_dpipe_dump_ctx dump_ctx;
+ struct nlmsghdr *nlh;
+ int err;
+
+ dump_ctx.skb = NULL;
+ dump_ctx.cmd = cmd;
+ dump_ctx.info = info;
+
+ err = table->table_ops->entries_dump(table->priv,
+ table->counters_enabled,
+ &dump_ctx);
+ if (err)
+ goto err_entries_dump;
+
+send_done:
+ nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(dump_ctx.skb, info);
+
+err_entries_dump:
+err_skb_send_alloc:
+ genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
+ nlmsg_free(dump_ctx.skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ const char *table_name;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (!table->table_ops->entries_dump)
+ return -EINVAL;
+
+ return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ 0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+ const struct devlink_dpipe_header *header)
+{
+ struct devlink_dpipe_field *field;
+ struct nlattr *field_attr;
+ int i;
+
+ for (i = 0; i < header->fields_count; i++) {
+ field = &header->fields[i];
+ field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD);
+ if (!field_attr)
+ return -EMSGSIZE;
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+ goto nla_put_failure;
+ nla_nest_end(skb, field_attr);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, field_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+ struct devlink_dpipe_header *header)
+{
+ struct nlattr *fields_attr, *header_attr;
+ int err;
+
+ header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER);
+ if (!header_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+ if (!fields_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_fields_put(skb, header);
+ if (err) {
+ nla_nest_cancel(skb, fields_attr);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fields_attr);
+ nla_nest_end(skb, header_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+ nla_nest_cancel(skb, header_attr);
+ return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_headers *
+ dpipe_headers)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *headers_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ void *hdr;
+ int i, j;
+ int err;
+
+ i = 0;
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+ if (!headers_attr)
+ goto nla_put_failure;
+
+ j = 0;
+ for (; i < dpipe_headers->headers_count; i++) {
+ err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+ if (err) {
+ if (!j)
+ goto err_table_put;
+ break;
+ }
+ j++;
+ }
+ nla_nest_end(skb, headers_attr);
+ genlmsg_end(skb, hdr);
+ if (i != dpipe_headers->headers_count)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink->dpipe_headers)
+ return -EOPNOTSUPP;
+ return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+ 0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+ const char *table_name,
+ bool enable)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (table->counter_control_extern)
+ return -EOPNOTSUPP;
+
+ if (!(table->counters_enabled ^ enable))
+ return 0;
+
+ table->counters_enabled = enable;
+ if (table->table_ops->counters_set_update)
+ table->table_ops->counters_set_update(table->priv, enable);
+ return 0;
+}
+
+static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name;
+ bool counters_enable;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] ||
+ !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+ return devlink_dpipe_table_counters_set(devlink, table_name,
+ counters_enable);
}
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1512,6 +2209,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -1644,6 +2344,34 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+ .doit = devlink_nl_cmd_dpipe_table_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ .doit = devlink_nl_cmd_dpipe_entries_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+ .doit = devlink_nl_cmd_dpipe_headers_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+ .doit = devlink_nl_cmd_dpipe_table_counters_set,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -1680,6 +2408,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
+ INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -1880,6 +2609,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
+/**
+ * devlink_dpipe_headers_register - register dpipe headers
+ *
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
+ *
+ * Register the headers supported by hardware.
+ */
+int devlink_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = dpipe_headers;
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
+
+/**
+ * devlink_dpipe_headers_unregister - unregister dpipe headers
+ *
+ * @devlink: devlink
+ *
+ * Unregister the headers supported by hardware.
+ */
+void devlink_dpipe_headers_unregister(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = NULL;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
+
+/**
+ * devlink_dpipe_table_counter_enabled - check if counter allocation
+ * required
+ * @devlink: devlink
+ * @table_name: tables name
+ *
+ * Used by driver to check if counter allocation is required.
+ * After counter allocation is turned on the table entries
+ * are updated to include counter statistics.
+ *
+ * After that point on the driver must respect the counter
+ * state so that each entry added to the table is added
+ * with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+ bool enabled;
+
+ rcu_read_lock();
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ enabled = false;
+ if (table)
+ enabled = table->counters_enabled;
+ rcu_read_unlock();
+ return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ * devlink_dpipe_table_register - register dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @size: size
+ * @counter_control_extern: external control for counters
+ */
+int devlink_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, u64 size,
+ bool counter_control_extern)
+{
+ struct devlink_dpipe_table *table;
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
+ return -EEXIST;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ table->name = table_name;
+ table->table_ops = table_ops;
+ table->priv = priv;
+ table->size = size;
+ table->counter_control_extern = counter_control_extern;
+
+ mutex_lock(&devlink_mutex);
+ list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
+
+/**
+ * devlink_dpipe_table_unregister - unregister dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ */
+void devlink_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ mutex_lock(&devlink_mutex);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ goto unlock;
+ list_del_rcu(&table->list);
+ mutex_unlock(&devlink_mutex);
+ kfree_rcu(table, rcu);
+ return;
+unlock:
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index fb55327dcfea..70ccda233bd1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -412,9 +412,8 @@ static int __init init_net_drop_monitor(void)
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
INIT_WORK(&data->dm_alert_work, send_dm_alert);
- init_timer(&data->send_timer);
- data->send_timer.data = (unsigned long)data;
- data->send_timer.function = sched_send_work;
+ setup_timer(&data->send_timer, sched_send_work,
+ (unsigned long)data);
spin_lock_init(&data->lock);
reset_per_cpu_data(data);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aecb2c7241b6..03111a2d6653 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -90,6 +90,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
+ [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
@@ -103,12 +104,15 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
[NETIF_F_HW_TC_BIT] = "hw-tc-offload",
+ [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
+ [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
};
static const char
rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
[ETH_RSS_HASH_TOP_BIT] = "toeplitz",
[ETH_RSS_HASH_XOR_BIT] = "xor",
+ [ETH_RSS_HASH_CRC32_BIT] = "crc32",
};
static const char
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index b6791d94841d..c58c1df6f92b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = {
KUIDT_INIT(~0),
};
+bool fib_rule_matchall(const struct fib_rule *rule)
+{
+ if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
+ rule->flags)
+ return false;
+ if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
+ return false;
+ if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
+ !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib_rule_matchall);
+
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
{
@@ -354,7 +368,8 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
return 0;
}
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -372,7 +387,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
if (err < 0)
goto errout;
@@ -547,7 +562,8 @@ errout:
}
EXPORT_SYMBOL_GPL(fib_nl_newrule);
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -566,7 +582,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
if (err < 0)
goto errout;
diff --git a/net/core/filter.c b/net/core/filter.c
index ebaeaf2e46e8..9a37860a80fc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
+#include <linux/sock_diag.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
@@ -52,6 +53,7 @@
#include <net/dst_metadata.h>
#include <net/dst.h>
#include <net/sock_reuseport.h>
+#include <net/busy_poll.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -91,8 +93,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+ struct sock *save_sk = skb->sk;
+ unsigned int pkt_len;
+
+ skb->sk = sk;
+ pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
+ skb->sk = save_sk;
}
rcu_read_unlock();
@@ -348,7 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* @new_prog: buffer where converted program will be stored
* @new_len: pointer to store length of converted program
*
- * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
+ * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
+ * style extended BPF (eBPF).
* Conversion workflow:
*
* 1) First pass for calculating the new program length:
@@ -928,7 +936,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
*/
static void sk_filter_release(struct sk_filter *fp)
{
- if (atomic_dec_and_test(&fp->refcnt))
+ if (refcount_dec_and_test(&fp->refcnt))
call_rcu(&fp->rcu, sk_filter_release_rcu);
}
@@ -943,20 +951,27 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
/* try to charge the socket memory if there is space available
* return true on success
*/
-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
/* same check as in sock_kmalloc() */
if (filter_size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
- atomic_inc(&fp->refcnt);
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
return false;
}
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+{
+ bool ret = __sk_filter_charge(sk, fp);
+ if (ret)
+ refcount_inc(&fp->refcnt);
+ return ret;
+}
+
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
{
struct sock_filter *old_prog;
@@ -1179,12 +1194,12 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return -ENOMEM;
fp->prog = prog;
- atomic_set(&fp->refcnt, 0);
- if (!sk_filter_charge(sk, fp)) {
+ if (!__sk_filter_charge(sk, fp)) {
kfree(fp);
return -ENOMEM;
}
+ refcount_set(&fp->refcnt, 1);
old_fp = rcu_dereference_protected(sk->sk_filter,
lockdep_sock_is_held(sk));
@@ -2599,6 +2614,36 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
+{
+ return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
+ .func = bpf_get_socket_cookie,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
+{
+ struct sock *sk = sk_to_full_sk(skb->sk);
+ kuid_t kuid;
+
+ if (!sk || !sk_fullsock(sk))
+ return overflowuid;
+ kuid = sock_net_uid(sock_net(sk), sk);
+ return from_kuid_munged(sock_net(sk)->user_ns, kuid);
+}
+
+static const struct bpf_func_proto bpf_get_socket_uid_proto = {
+ .func = bpf_get_socket_uid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -2633,6 +2678,10 @@ sk_filter_func_proto(enum bpf_func_id func_id)
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -2692,6 +2741,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -2715,12 +2768,7 @@ xdp_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id)
{
- switch (func_id) {
- case BPF_FUNC_skb_load_bytes:
- return &bpf_skb_load_bytes_proto;
- default:
- return bpf_base_func_proto(func_id);
- }
+ return sk_filter_func_proto(func_id);
}
static const struct bpf_func_proto *
@@ -3156,6 +3204,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
break;
+
+ case offsetof(struct __sk_buff, napi_id):
+#if defined(CONFIG_NET_RX_BUSY_POLL)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, napi_id));
+ *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#else
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#endif
+ break;
}
return insn - insn_buf;
@@ -3252,111 +3313,55 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-static const struct bpf_verifier_ops sk_filter_ops = {
+const struct bpf_verifier_ops sk_filter_prog_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-static const struct bpf_verifier_ops tc_cls_act_ops = {
+const struct bpf_verifier_ops tc_cls_act_prog_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops xdp_ops = {
+const struct bpf_verifier_ops xdp_prog_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+ .test_run = bpf_prog_test_run_xdp,
};
-static const struct bpf_verifier_ops cg_skb_ops = {
+const struct bpf_verifier_ops cg_skb_prog_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops lwt_inout_ops = {
+const struct bpf_verifier_ops lwt_inout_prog_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops lwt_xmit_ops = {
+const struct bpf_verifier_ops lwt_xmit_prog_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
-static const struct bpf_verifier_ops cg_sock_ops = {
+const struct bpf_verifier_ops cg_sock_prog_ops = {
.get_func_proto = bpf_base_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-static struct bpf_prog_type_list sk_filter_type __ro_after_init = {
- .ops = &sk_filter_ops,
- .type = BPF_PROG_TYPE_SOCKET_FILTER,
-};
-
-static struct bpf_prog_type_list sched_cls_type __ro_after_init = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_CLS,
-};
-
-static struct bpf_prog_type_list sched_act_type __ro_after_init = {
- .ops = &tc_cls_act_ops,
- .type = BPF_PROG_TYPE_SCHED_ACT,
-};
-
-static struct bpf_prog_type_list xdp_type __ro_after_init = {
- .ops = &xdp_ops,
- .type = BPF_PROG_TYPE_XDP,
-};
-
-static struct bpf_prog_type_list cg_skb_type __ro_after_init = {
- .ops = &cg_skb_ops,
- .type = BPF_PROG_TYPE_CGROUP_SKB,
-};
-
-static struct bpf_prog_type_list lwt_in_type __ro_after_init = {
- .ops = &lwt_inout_ops,
- .type = BPF_PROG_TYPE_LWT_IN,
-};
-
-static struct bpf_prog_type_list lwt_out_type __ro_after_init = {
- .ops = &lwt_inout_ops,
- .type = BPF_PROG_TYPE_LWT_OUT,
-};
-
-static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = {
- .ops = &lwt_xmit_ops,
- .type = BPF_PROG_TYPE_LWT_XMIT,
-};
-
-static struct bpf_prog_type_list cg_sock_type __ro_after_init = {
- .ops = &cg_sock_ops,
- .type = BPF_PROG_TYPE_CGROUP_SOCK
-};
-
-static int __init register_sk_filter_ops(void)
-{
- bpf_register_prog_type(&sk_filter_type);
- bpf_register_prog_type(&sched_cls_type);
- bpf_register_prog_type(&sched_act_type);
- bpf_register_prog_type(&xdp_type);
- bpf_register_prog_type(&cg_skb_type);
- bpf_register_prog_type(&cg_sock_type);
- bpf_register_prog_type(&lwt_in_type);
- bpf_register_prog_type(&lwt_out_type);
- bpf_register_prog_type(&lwt_xmit_type);
-
- return 0;
-}
-late_initcall(register_sk_filter_ops);
-
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow.c b/net/core/flow.c
index f765c11d8df5..f7f5d1932a27 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -47,7 +47,7 @@ struct flow_flush_info {
static struct kmem_cache *flow_cachep __read_mostly;
-#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
+#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
@@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work)
}
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list,
+ unsigned int deleted,
+ struct list_head *gc_list,
struct netns_xfrm *xfrm)
{
if (deleted) {
@@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
static void __flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
- int shrink_to)
+ unsigned int shrink_to)
{
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
for (i = 0; i < flow_cache_hash_size(fc); i++) {
- int saved = 0;
+ unsigned int saved = 0;
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
@@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
static void flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp)
{
- int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+ unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
__flow_cache_shrink(fc, fcp, shrink_to);
}
@@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
const struct flowi *key,
- size_t keysize)
+ unsigned int keysize)
{
const u32 *k = (const u32 *) key;
const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
@@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc,
* important assumptions that we can here, such as alignment.
*/
static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
- size_t keysize)
+ unsigned int keysize)
{
const flow_compare_t *k1, *k1_lim, *k2;
@@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
- size_t keysize;
+ unsigned int keysize;
unsigned int hash;
local_bh_disable();
@@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
@@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp;
- int i;
+ unsigned int i;
fcp = per_cpu_ptr(fc->percpu, cpu);
for (i = 0; i < flow_cache_hash_size(fc); i++)
@@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net)
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
+ unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
if (!fcp->hash_table) {
fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!fcp->hash_table) {
- pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+ pr_err("NET: failed to allocate flow cache sz %u\n", sz);
return -ENOMEM;
}
fcp->hash_rnd_recalc = 1;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d98d4998213d..28d94bce4df8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -113,6 +113,235 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+enum flow_dissect_ret {
+ FLOW_DISSECT_RET_OUT_GOOD,
+ FLOW_DISSECT_RET_OUT_BAD,
+ FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
+};
+
+static enum flow_dissect_ret
+__skb_flow_dissect_mpls(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct mpls_label *hdr, _hdr[2];
+ u32 entry, label;
+
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
+ !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+ hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ entry = ntohl(hdr[0].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+ struct flow_dissector_key_mpls *key_mpls;
+
+ key_mpls = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS,
+ target_container);
+ key_mpls->mpls_label = label;
+ key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
+ >> MPLS_LS_TTL_SHIFT;
+ key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
+ >> MPLS_LS_TC_SHIFT;
+ key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
+ >> MPLS_LS_S_SHIFT;
+ }
+
+ if (label == MPLS_LABEL_ENTROPY) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+ target_container);
+ key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+ }
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_arp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_arp *key_arp;
+ struct {
+ unsigned char ar_sha[ETH_ALEN];
+ unsigned char ar_sip[4];
+ unsigned char ar_tha[ETH_ALEN];
+ unsigned char ar_tip[4];
+ } *arp_eth, _arp_eth;
+ const struct arphdr *arp;
+ struct arphdr _arp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+ hlen, &_arp);
+ if (!arp)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_hln != ETH_ALEN ||
+ arp->ar_pln != 4 ||
+ (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST)))
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+ sizeof(_arp_eth), data,
+ hlen, &_arp_eth);
+ if (!arp_eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ key_arp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP,
+ target_container);
+
+ memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
+ memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));
+
+ /* Only store the lower byte of the opcode;
+ * this covers ARPOP_REPLY and ARPOP_REQUEST.
+ */
+ key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+ ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+ ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_gre(const struct sk_buff *skb,
+ struct flow_dissector_key_control *key_control,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data,
+ __be16 *p_proto, int *p_nhoff, int *p_hlen,
+ unsigned int flags)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct gre_base_hdr *hdr, _hdr;
+ int offset = 0;
+ u16 gre_ver;
+
+ hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
+ data, *p_hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ /* Only look inside GRE without routing */
+ if (hdr->flags & GRE_ROUTING)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ /* Only look inside GRE for version 0 and 1 */
+ gre_ver = ntohs(hdr->flags & GRE_VERSION);
+ if (gre_ver > 1)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ *p_proto = hdr->protocol;
+ if (gre_ver) {
+ /* Version1 must be PPTP, and check the flags */
+ if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+ }
+
+ offset += sizeof(struct gre_base_hdr);
+
+ if (hdr->flags & GRE_CSUM)
+ offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
+ sizeof(((struct gre_full_hdr *) 0)->reserved1);
+
+ if (hdr->flags & GRE_KEY) {
+ const __be32 *keyid;
+ __be32 _keyid;
+
+ keyid = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_keyid),
+ data, *p_hlen, &_keyid);
+ if (!keyid)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID,
+ target_container);
+ if (gre_ver == 0)
+ key_keyid->keyid = *keyid;
+ else
+ key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
+ }
+ offset += sizeof(((struct gre_full_hdr *) 0)->key);
+ }
+
+ if (hdr->flags & GRE_SEQ)
+ offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+
+ if (gre_ver == 0) {
+ if (*p_proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_eth),
+ data, *p_hlen, &_eth);
+ if (!eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+ *p_proto = eth->h_proto;
+ offset += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ *p_hlen = *p_nhoff + offset;
+ }
+ } else { /* version 1, must be PPTP */
+ u8 _ppp_hdr[PPP_HDRLEN];
+ u8 *ppp_hdr;
+
+ if (hdr->flags & GRE_ACK)
+ offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+
+ ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_ppp_hdr),
+ data, *p_hlen, _ppp_hdr);
+ if (!ppp_hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ switch (PPP_PROTOCOL(ppp_hdr)) {
+ case PPP_IP:
+ *p_proto = htons(ETH_P_IP);
+ break;
+ case PPP_IPV6:
+ *p_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ /* Could probably catch some more like MPLS */
+ break;
+ }
+
+ offset += PPP_HDRLEN;
+ }
+
+ *p_nhoff += offset;
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -138,12 +367,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_arp *key_arp;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
- struct flow_dissector_key_keyid *key_keyid;
bool skip_vlan = false;
u8 ip_proto = 0;
bool ret;
@@ -181,7 +408,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
}
-again:
+proto_again:
switch (proto) {
case htons(ETH_P_IP): {
const struct iphdr *iph;
@@ -284,7 +511,7 @@ ipv6:
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
if (skip_vlan)
- goto again;
+ goto proto_again;
}
skip_vlan = true;
@@ -307,7 +534,7 @@ ipv6:
}
}
- goto again;
+ goto proto_again;
}
case htons(ETH_P_PPP_SES): {
struct {
@@ -349,31 +576,17 @@ ipv6:
}
case htons(ETH_P_MPLS_UC):
- case htons(ETH_P_MPLS_MC): {
- struct mpls_label *hdr, _hdr[2];
+ case htons(ETH_P_MPLS_MC):
mpls:
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
- hlen, &_hdr);
- if (!hdr)
- goto out_bad;
-
- if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
- MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
- target_container);
- key_keyid->keyid = hdr[1].entry &
- htonl(MPLS_LS_LABEL_MASK);
- }
-
+ switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
+ goto out_bad;
}
-
- goto out_good;
- }
-
case htons(ETH_P_FCOE):
if ((hlen - nhoff) < FCOE_HEADER_LEN)
goto out_bad;
@@ -382,177 +595,33 @@ mpls:
goto out_good;
case htons(ETH_P_ARP):
- case htons(ETH_P_RARP): {
- struct {
- unsigned char ar_sha[ETH_ALEN];
- unsigned char ar_sip[4];
- unsigned char ar_tha[ETH_ALEN];
- unsigned char ar_tip[4];
- } *arp_eth, _arp_eth;
- const struct arphdr *arp;
- struct arphdr _arp;
-
- arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
- hlen, &_arp);
- if (!arp)
- goto out_bad;
-
- if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_hln != ETH_ALEN ||
- arp->ar_pln != 4 ||
- (arp->ar_op != htons(ARPOP_REPLY) &&
- arp->ar_op != htons(ARPOP_REQUEST)))
- goto out_bad;
-
- arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
- sizeof(_arp_eth), data,
- hlen,
- &_arp_eth);
- if (!arp_eth)
+ case htons(ETH_P_RARP):
+ switch (__skb_flow_dissect_arp(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP)) {
-
- key_arp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP,
- target_container);
-
- memcpy(&key_arp->sip, arp_eth->ar_sip,
- sizeof(key_arp->sip));
- memcpy(&key_arp->tip, arp_eth->ar_tip,
- sizeof(key_arp->tip));
-
- /* Only store the lower byte of the opcode;
- * this covers ARPOP_REPLY and ARPOP_REQUEST.
- */
- key_arp->op = ntohs(arp->ar_op) & 0xff;
-
- ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
- ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
}
-
- goto out_good;
- }
-
default:
goto out_bad;
}
ip_proto_again:
switch (ip_proto) {
- case IPPROTO_GRE: {
- struct gre_base_hdr *hdr, _hdr;
- u16 gre_ver;
- int offset = 0;
-
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
- if (!hdr)
+ case IPPROTO_GRE:
+ switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+ target_container, data,
+ &proto, &nhoff, &hlen, flags)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
goto out_bad;
-
- /* Only look inside GRE without routing */
- if (hdr->flags & GRE_ROUTING)
- break;
-
- /* Only look inside GRE for version 0 and 1 */
- gre_ver = ntohs(hdr->flags & GRE_VERSION);
- if (gre_ver > 1)
- break;
-
- proto = hdr->protocol;
- if (gre_ver) {
- /* Version1 must be PPTP, and check the flags */
- if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
- break;
- }
-
- offset += sizeof(struct gre_base_hdr);
-
- if (hdr->flags & GRE_CSUM)
- offset += sizeof(((struct gre_full_hdr *)0)->csum) +
- sizeof(((struct gre_full_hdr *)0)->reserved1);
-
- if (hdr->flags & GRE_KEY) {
- const __be32 *keyid;
- __be32 _keyid;
-
- keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
- data, hlen, &_keyid);
- if (!keyid)
- goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID,
- target_container);
- if (gre_ver == 0)
- key_keyid->keyid = *keyid;
- else
- key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
- }
- offset += sizeof(((struct gre_full_hdr *)0)->key);
- }
-
- if (hdr->flags & GRE_SEQ)
- offset += sizeof(((struct pptp_gre_header *)0)->seq);
-
- if (gre_ver == 0) {
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
- goto out_bad;
- proto = eth->h_proto;
- offset += sizeof(*eth);
-
- /* Cap headers that we access via pointers at the
- * end of the Ethernet header as our maximum alignment
- * at that point is only 2 bytes.
- */
- if (NET_IP_ALIGN)
- hlen = (nhoff + offset);
- }
- } else { /* version 1, must be PPTP */
- u8 _ppp_hdr[PPP_HDRLEN];
- u8 *ppp_hdr;
-
- if (hdr->flags & GRE_ACK)
- offset += sizeof(((struct pptp_gre_header *)0)->ack);
-
- ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_ppp_hdr),
- data, hlen, _ppp_hdr);
- if (!ppp_hdr)
- goto out_bad;
-
- switch (PPP_PROTOCOL(ppp_hdr)) {
- case PPP_IP:
- proto = htons(ETH_P_IP);
- break;
- case PPP_IPV6:
- proto = htons(ETH_P_IPV6);
- break;
- default:
- /* Could probably catch some more like MPLS */
- break;
- }
-
- offset += PPP_HDRLEN;
+ case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
+ goto proto_again;
}
-
- nhoff += offset;
- key_control->flags |= FLOW_DIS_ENCAPSULATION;
- if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
- goto out_good;
-
- goto again;
- }
case NEXTHDR_HOP:
case NEXTHDR_ROUTING:
case NEXTHDR_DEST: {
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index c98bbfbd26b8..814e58a3ce8b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct gro_cell *cell;
- if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+ if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
return netif_rx(skb);
cell = this_cpu_ptr(gcells->cells);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 0cfe7b0216c3..b3bc0a31af9f 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -209,7 +209,8 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
int ret;
u32 fd;
- ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy);
+ ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy,
+ NULL);
if (ret < 0)
return ret;
@@ -249,7 +250,7 @@ static int bpf_build_state(struct nlattr *nla,
if (family != AF_INET && family != AF_INET6)
return -EAFNOSUPPORT;
- ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy);
+ ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL);
if (ret < 0)
return ret;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 6df9f8fabf0c..5cbed3816229 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -162,7 +162,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
struct rtnexthop *rtnh = (struct rtnexthop *)attr;
struct nlattr *nla_entype;
struct nlattr *attrs;
- struct nlattr *nla;
u16 encap_type;
int attrlen;
@@ -170,7 +169,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
attrs = rtnh_attrs(rtnh);
- nla = nla_find(attrs, attrlen, RTA_ENCAP);
nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla_entype) {
@@ -216,6 +214,8 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
ret = -EOPNOTSUPP;
nest = nla_nest_start(skb, RTA_ENCAP);
+ if (!nest)
+ goto nla_put_failure;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->fill_encap))
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4526cbd7e28a..58b0bcc125b5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -52,8 +52,9 @@ do { \
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(unsigned long arg);
-static void __neigh_notify(struct neighbour *n, int type, int flags);
-static void neigh_update_notify(struct neighbour *neigh);
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid);
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
#ifdef CONFIG_PROC_FS
@@ -99,7 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
if (neigh->parms->neigh_cleanup)
neigh->parms->neigh_cleanup(neigh);
- __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
@@ -949,7 +950,7 @@ out:
}
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, 0);
neigh_release(neigh);
}
@@ -1073,7 +1074,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
*/
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags)
+ u32 flags, u32 nlmsg_pid)
{
u8 old;
int err;
@@ -1230,7 +1231,7 @@ out:
write_unlock_bh(&neigh->lock);
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, nlmsg_pid);
return err;
}
@@ -1261,7 +1262,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
lladdr || !dev->addr_len);
if (neigh)
neigh_update(neigh, lladdr, NUD_STALE,
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE, 0);
return neigh;
}
EXPORT_SYMBOL(neigh_event_ns);
@@ -1589,7 +1590,8 @@ static struct neigh_table *neigh_find_table(int family)
return tbl;
}
-static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -1639,14 +1641,16 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
return err;
}
-static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
struct net *net = sock_net(skb->sk);
@@ -1659,7 +1663,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
int err;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
goto out;
@@ -1730,7 +1734,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
neigh_event_send(neigh, NULL);
err = 0;
} else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
@@ -1933,7 +1938,8 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
};
-static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct neigh_table *tbl;
@@ -1943,7 +1949,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
int err, tidx;
err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
- nl_neightbl_policy);
+ nl_neightbl_policy, extack);
if (err < 0)
goto errout;
@@ -1981,7 +1987,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
int i, ifindex = 0;
err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
- nl_ntbl_parm_policy);
+ nl_ntbl_parm_policy, extack);
if (err < 0)
goto errout_tbl_lock;
@@ -2230,10 +2236,10 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void neigh_update_notify(struct neighbour *neigh)
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
- __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
@@ -2272,7 +2278,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
unsigned int flags = NLM_F_MULTI;
int err;
- err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
if (!err) {
if (tb[NDA_IFINDEX])
filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
@@ -2831,7 +2837,8 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(4); /* NDA_PROBES */
}
-static void __neigh_notify(struct neighbour *n, int type, int flags)
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid)
{
struct net *net = dev_net(n->dev);
struct sk_buff *skb;
@@ -2841,7 +2848,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
if (skb == NULL)
goto errout;
- err = neigh_fill_info(skb, n, 0, 0, type, flags);
+ err = neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2857,7 +2864,7 @@ errout:
void neigh_app_ns(struct neighbour *n)
{
- __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+ __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 652468ff65b7..c1d8aed8e5a8 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -571,7 +571,8 @@ static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
[NETNSA_FD] = { .type = NLA_U32 },
};
-static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
@@ -579,7 +580,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
int nsid, err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
- rtnl_net_policy);
+ rtnl_net_policy, extack);
if (err < 0)
return err;
if (!tb[NETNSA_NSID])
@@ -644,7 +645,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
@@ -653,7 +655,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
int err, id;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
- rtnl_net_policy);
+ rtnl_net_policy, extack);
if (err < 0)
return err;
if (tb[NETNSA_PID])
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0f9275ee5595..1c4810919a0a 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/module.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c4e84c558240..9031a6c8bfa7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -896,15 +896,13 @@ static size_t rtnl_port_size(const struct net_device *dev,
return port_self_size;
}
-static size_t rtnl_xdp_size(const struct net_device *dev)
+static size_t rtnl_xdp_size(void)
{
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
- nla_total_size(1); /* XDP_ATTACHED */
+ nla_total_size(1) + /* XDP_ATTACHED */
+ nla_total_size(4); /* XDP_FLAGS */
- if (!dev->netdev_ops->ndo_xdp)
- return 0;
- else
- return xdp_size;
+ return xdp_size;
}
static noinline size_t if_nlmsg_size(const struct net_device *dev,
@@ -943,7 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
- + rtnl_xdp_size(dev) /* IFLA_XDP */
+ + rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
@@ -1251,23 +1249,35 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
- struct netdev_xdp xdp_op = {};
struct nlattr *xdp;
+ u32 xdp_flags = 0;
+ u8 val = 0;
int err;
- if (!dev->netdev_ops->ndo_xdp)
- return 0;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
- xdp_op.command = XDP_QUERY_PROG;
- err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
- if (err)
- goto err_cancel;
- err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached);
+ if (rcu_access_pointer(dev->xdp_prog)) {
+ xdp_flags = XDP_FLAGS_SKB_MODE;
+ val = 1;
+ } else if (dev->netdev_ops->ndo_xdp) {
+ struct netdev_xdp xdp_op = {};
+
+ xdp_op.command = XDP_QUERY_PROG;
+ err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
+ if (err)
+ goto err_cancel;
+ val = xdp_op.prog_attached;
+ }
+ err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
if (err)
goto err_cancel;
+ if (xdp_flags) {
+ err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
+ if (err)
+ goto err_cancel;
+ }
nla_nest_end(skb, xdp);
return 0;
@@ -1515,7 +1525,8 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
const struct rtnl_link_ops *ops = NULL;
struct nlattr *linfo[IFLA_INFO_MAX + 1];
- if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
+ if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla,
+ ifla_info_policy, NULL) < 0)
return NULL;
if (linfo[IFLA_INFO_KIND]) {
@@ -1592,8 +1603,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
-
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
+ ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1640,9 +1651,10 @@ out:
return skb->len;
}
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
+int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
+ struct netlink_ext_ack *exterr)
{
- return nla_parse(tb, IFLA_MAX, head, len, ifla_policy);
+ return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr);
}
EXPORT_SYMBOL(rtnl_nla_parse_ifla);
@@ -2078,7 +2090,7 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
- ifla_vf_policy);
+ ifla_vf_policy, NULL);
if (err < 0)
goto errout;
err = do_setvfinfo(dev, vfinfo);
@@ -2106,7 +2118,7 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
- ifla_port_policy);
+ ifla_port_policy, NULL);
if (err < 0)
goto errout;
if (!port[IFLA_PORT_VF]) {
@@ -2126,7 +2138,8 @@ static int do_setlink(const struct sk_buff *skb,
struct nlattr *port[IFLA_PORT_MAX+1];
err = nla_parse_nested(port, IFLA_PORT_MAX,
- tb[IFLA_PORT_SELF], ifla_port_policy);
+ tb[IFLA_PORT_SELF], ifla_port_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -2170,7 +2183,7 @@ static int do_setlink(const struct sk_buff *skb,
u32 xdp_flags = 0;
err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
- ifla_xdp_policy);
+ ifla_xdp_policy, NULL);
if (err < 0)
goto errout;
@@ -2210,7 +2223,8 @@ errout:
return err;
}
-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2219,7 +2233,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
char ifname[IFNAMSIZ];
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy,
+ extack);
if (err < 0)
goto errout;
@@ -2303,7 +2318,8 @@ int rtnl_delete_link(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(rtnl_delete_link);
-static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev;
@@ -2312,7 +2328,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2423,7 +2439,8 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
@@ -2441,7 +2458,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
#ifdef CONFIG_MODULES
replay:
#endif
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2472,7 +2489,8 @@ replay:
if (tb[IFLA_LINKINFO]) {
err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
- tb[IFLA_LINKINFO], ifla_info_policy);
+ tb[IFLA_LINKINFO], ifla_info_policy,
+ NULL);
if (err < 0)
return err;
} else
@@ -2497,7 +2515,7 @@ replay:
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
err = nla_parse_nested(attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
- ops->policy);
+ ops->policy, NULL);
if (err < 0)
return err;
data = attr;
@@ -2515,7 +2533,8 @@ replay:
err = nla_parse_nested(slave_attr,
m_ops->slave_maxtype,
linkinfo[IFLA_INFO_SLAVE_DATA],
- m_ops->slave_policy);
+ m_ops->slave_policy,
+ NULL);
if (err < 0)
return err;
slave_data = slave_attr;
@@ -2673,7 +2692,8 @@ out_unregister:
}
}
-static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2684,7 +2704,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
int err;
u32 ext_filter_mask = 0;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
@@ -2734,7 +2754,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
@@ -2955,7 +2975,8 @@ static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
return 0;
}
-static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -2965,7 +2986,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
u16 vid;
int err;
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -3055,7 +3076,8 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
-static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -3068,7 +3090,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -3203,8 +3225,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err = 0;
int fidx = 0;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) == 0) {
+ if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, NULL) == 0) {
if (tb[IFLA_MASTER])
br_idx = nla_get_u32(tb[IFLA_MASTER]);
}
@@ -3498,7 +3520,8 @@ errout:
return err;
}
-static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -3572,7 +3595,8 @@ out:
return err;
}
-static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -3940,7 +3964,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
return size;
}
-static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *dev = NULL;
@@ -4046,7 +4071,8 @@ out:
/* Process one rtnetlink message. */
-static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
rtnl_doit_func doit;
@@ -4101,7 +4127,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (doit == NULL)
return -EOPNOTSUPP;
- return doit(skb, nlh);
+ return doit(skb, nlh, extack);
}
static void rtnetlink_rcv(struct sk_buff *skb)
@@ -4116,22 +4142,16 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_PRE_UP:
- case NETDEV_POST_INIT:
- case NETDEV_REGISTER:
- case NETDEV_CHANGE:
- case NETDEV_PRE_TYPE_CHANGE:
- case NETDEV_GOING_DOWN:
- case NETDEV_UNREGISTER:
- case NETDEV_UNREGISTER_FINAL:
- case NETDEV_RELEASE:
- case NETDEV_JOIN:
- case NETDEV_BONDING_INFO:
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_RESEND_IGMP:
+ case NETDEV_CHANGEINFODATA:
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
default:
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
}
return NOTIFY_DONE;
@@ -4185,6 +4205,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index d28da7d363f1..6bd2f8fb0476 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -64,8 +64,8 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
&ts_secret);
}
-u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
const struct {
struct in6_addr saddr;
@@ -85,7 +85,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
*tsoff = secure_tcpv6_ts_off(saddr, daddr);
return seq_scale(hash);
}
-EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
@@ -116,14 +116,13 @@ static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
&ts_secret);
}
-/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
* it would be easy enough to have the former function use siphash_4u32, passing
* the arguments as separate u32.
*/
-
-u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
u64 hash;
net_secret_init();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f86bf69cfb8d..58604c1889bd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3100,7 +3100,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
skb_walk_frags(head_skb, iter) {
if (frag_len != iter->len && iter->next)
goto normal;
- if (skb_headlen(iter))
+ if (skb_headlen(iter) && !iter->head_frag)
goto normal;
len -= iter->len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 2c4f574168fb..a06bb7a2a689 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -247,12 +247,66 @@ static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
_sock_locks("k-clock-")
};
+static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
+ "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
+ "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
+ "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
+ "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
+ "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
+ "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
+ "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
+ "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
+ "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
+ "rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
+ "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
+ "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
+ "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
+ "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
+ "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX"
+};
+static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
+ "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
+ "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
+ "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
+ "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
+ "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
+ "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
+ "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
+ "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
+ "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
+ "wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
+ "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
+ "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
+ "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
+ "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
+ "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX"
+};
+static const char *const af_family_elock_key_strings[AF_MAX+1] = {
+ "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
+ "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
+ "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
+ "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
+ "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
+ "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
+ "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
+ "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
+ "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
+ "elock-27" , "elock-28" , "elock-AF_CAN" ,
+ "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
+ "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
+ "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
+ "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
+ "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX"
+};
/*
- * sk_callback_lock locking rules are per-address-family,
+ * sk_callback_lock and sk queues locking rules are per-address-family,
* so split the lock classes by using a per-AF key:
*/
static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_rlock_keys[AF_MAX];
+static struct lock_class_key af_wlock_keys[AF_MAX];
+static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
/* Take into consideration the size of the struct sk_buff overhead in the
@@ -1029,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
union {
int val;
+ u64 val64;
struct linger ling;
struct timeval tm;
} v;
@@ -1259,6 +1314,40 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_incoming_cpu;
break;
+ case SO_MEMINFO:
+ {
+ u32 meminfo[SK_MEMINFO_VARS];
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ sk_get_meminfo(sk, meminfo);
+
+ len = min_t(unsigned int, len, sizeof(meminfo));
+ if (copy_to_user(optval, &meminfo, len))
+ return -EFAULT;
+
+ goto lenout;
+ }
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_INCOMING_NAPI_ID:
+ v.val = READ_ONCE(sk->sk_napi_id);
+
+ /* aggregate non-NAPI IDs down to 0 */
+ if (v.val < MIN_NAPI_ID)
+ v.val = 0;
+
+ break;
+#endif
+
+ case SO_COOKIE:
+ lv = sizeof(u64);
+ if (len < lv)
+ return -EINVAL;
+ v.val64 = sock_gen_cookie(sk);
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1483,6 +1572,27 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
+static void sk_init_common(struct sock *sk)
+{
+ skb_queue_head_init(&sk->sk_receive_queue);
+ skb_queue_head_init(&sk->sk_write_queue);
+ skb_queue_head_init(&sk->sk_error_queue);
+
+ rwlock_init(&sk->sk_callback_lock);
+ lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
+ af_rlock_keys + sk->sk_family,
+ af_family_rlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_write_queue.lock,
+ af_wlock_keys + sk->sk_family,
+ af_family_wlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_error_queue.lock,
+ af_elock_keys + sk->sk_family,
+ af_family_elock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_callback_lock,
+ af_callback_keys + sk->sk_family,
+ af_family_clock_key_strings[sk->sk_family]);
+}
+
/**
* sk_clone_lock - clone a socket, and lock its clone
* @sk: the socket to clone
@@ -1516,13 +1626,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
atomic_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
- skb_queue_head_init(&newsk->sk_receive_queue);
- skb_queue_head_init(&newsk->sk_write_queue);
-
- rwlock_init(&newsk->sk_callback_lock);
- lockdep_set_class_and_name(&newsk->sk_callback_lock,
- af_callback_keys + newsk->sk_family,
- af_family_clock_key_strings[newsk->sk_family]);
+ sk_init_common(newsk);
newsk->sk_dst_cache = NULL;
newsk->sk_dst_pending_confirm = 0;
@@ -1533,7 +1637,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
- skb_queue_head_init(&newsk->sk_error_queue);
filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
@@ -2466,10 +2569,7 @@ EXPORT_SYMBOL(sk_stop_timer);
void sock_init_data(struct socket *sock, struct sock *sk)
{
- skb_queue_head_init(&sk->sk_receive_queue);
- skb_queue_head_init(&sk->sk_write_queue);
- skb_queue_head_init(&sk->sk_error_queue);
-
+ sk_init_common(sk);
sk->sk_send_head = NULL;
init_timer(&sk->sk_timer);
@@ -2521,7 +2621,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
- sk->sk_stamp = ktime_set(-1L, 0);
+ sk->sk_stamp = SK_DEFAULT_STAMP;
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
@@ -2802,6 +2902,21 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);
+void sk_get_meminfo(const struct sock *sk, u32 *mem)
+{
+ memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
+
+ mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+ mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+ mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+ mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+}
+
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
@@ -3142,3 +3257,14 @@ static int __init proto_init(void)
subsys_initcall(proto_init);
#endif /* PROC_FS */
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+bool sk_busy_loop_end(void *p, unsigned long start_time)
+{
+ struct sock *sk = p;
+
+ return !skb_queue_empty(&sk->sk_receive_queue) ||
+ sk_busy_loop_timeout(sk, start_time);
+}
+EXPORT_SYMBOL(sk_busy_loop_end);
+#endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 6b10573cc9fa..217f4e3b82f6 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
-static u64 sock_gen_cookie(struct sock *sk)
+u64 sock_gen_cookie(struct sock *sk)
{
while (1) {
u64 res = atomic64_read(&sk->sk_cookie);
@@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
{
u32 mem[SK_MEMINFO_VARS];
- mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
- mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
- mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
- mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+ sk_get_meminfo(sk, mem);
return nla_put(skb, attrtype, sizeof(mem), &mem);
}
@@ -246,7 +238,8 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
-static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
int ret;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9a1a352fd1eb..eed1ebf7f29d 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -13,9 +13,9 @@
static DEFINE_SPINLOCK(reuseport_lock);
-static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
+static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
- size_t size = sizeof(struct sock_reuseport) +
+ unsigned int size = sizeof(struct sock_reuseport) +
sizeof(struct sock *) * max_socks;
struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7f9cc400eca0..ea23254b2457 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -452,6 +452,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = &one,
.extra2 = &max_skb_frags,
},
+ {
+ .procname = "netdev_budget_usecs",
+ .data = &netdev_budget_usecs,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
{ }
};
diff --git a/net/core/utils.c b/net/core/utils.c
index 6592d7bbed39..d758880c09a7 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(net_ratelimit);
__be32 in_aton(const char *str)
{
- unsigned long l;
+ unsigned int l;
unsigned int val;
int i;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 3202d75329b5..93106120f987 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -245,8 +245,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
- tb[DCB_ATTR_PFC_CFG],
- dcbnl_pfc_up_nest);
+ tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL);
if (ret)
return ret;
@@ -304,7 +303,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP],
- dcbnl_cap_nest);
+ dcbnl_cap_nest, NULL);
if (ret)
return ret;
@@ -348,7 +347,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
- dcbnl_numtcs_nest);
+ dcbnl_numtcs_nest, NULL);
if (ret)
return ret;
@@ -393,7 +392,7 @@ static int dcbnl_setnumtcs(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
- dcbnl_numtcs_nest);
+ dcbnl_numtcs_nest, NULL);
if (ret)
return ret;
@@ -452,7 +451,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh,
return -EINVAL;
ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
- dcbnl_app_nest);
+ dcbnl_app_nest, NULL);
if (ret)
return ret;
@@ -520,7 +519,7 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh,
return -EINVAL;
ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
- dcbnl_app_nest);
+ dcbnl_app_nest, NULL);
if (ret)
return ret;
@@ -577,8 +576,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->getpgbwgcfgrx)
return -EOPNOTSUPP;
- ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
- tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+ ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG],
+ dcbnl_pg_nest, NULL);
if (ret)
return ret;
@@ -597,8 +596,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh,
data = pg_tb[DCB_PG_ATTR_TC_ALL];
else
data = pg_tb[i];
- ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
- data, dcbnl_tc_param_nest);
+ ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, data,
+ dcbnl_tc_param_nest, NULL);
if (ret)
goto err_pg;
@@ -735,8 +734,7 @@ static int dcbnl_setpfccfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
- tb[DCB_ATTR_PFC_CFG],
- dcbnl_pfc_up_nest);
+ tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL);
if (ret)
return ret;
@@ -791,8 +789,8 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->setpgbwgcfgrx)
return -EOPNOTSUPP;
- ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
- tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+ ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG],
+ dcbnl_pg_nest, NULL);
if (ret)
return ret;
@@ -801,7 +799,7 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
continue;
ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
- pg_tb[i], dcbnl_tc_param_nest);
+ pg_tb[i], dcbnl_tc_param_nest, NULL);
if (ret)
return ret;
@@ -889,8 +887,8 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->getbcncfg)
return -EOPNOTSUPP;
- ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN], dcbnl_bcn_nest);
+ ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN],
+ dcbnl_bcn_nest, NULL);
if (ret)
return ret;
@@ -948,9 +946,8 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
!netdev->dcbnl_ops->setbcnrp)
return -EOPNOTSUPP;
- ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN],
- dcbnl_pfc_up_nest);
+ ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN],
+ dcbnl_pfc_up_nest, NULL);
if (ret)
return ret;
@@ -1424,8 +1421,8 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_IEEE])
return -EINVAL;
- err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
- tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+ err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE],
+ dcbnl_ieee_policy, NULL);
if (err)
return err;
@@ -1508,8 +1505,8 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_IEEE])
return -EINVAL;
- err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
- tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+ err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE],
+ dcbnl_ieee_policy, NULL);
if (err)
return err;
@@ -1581,8 +1578,8 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_FEATCFG])
return -EINVAL;
- ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
- dcbnl_featcfg_nest);
+ ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX,
+ tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL);
if (ret)
return ret;
@@ -1625,8 +1622,8 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh,
if (!tb[DCB_ATTR_FEATCFG])
return -EINVAL;
- ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
- dcbnl_featcfg_nest);
+ ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX,
+ tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL);
if (ret)
goto err;
@@ -1699,7 +1696,8 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {
[DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get },
};
-static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
@@ -1715,7 +1713,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EPERM;
ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
- dcbnl_rtnl_policy);
+ dcbnl_rtnl_policy, extack);
if (ret < 0)
return ret;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7de5b40a5d0d..9afa2a5030b2 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -132,6 +132,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/fib_rules.h>
+#include <net/tcp.h>
#include <net/dn.h>
#include <net/dn_nsp.h>
#include <net/dn_dev.h>
@@ -1469,18 +1470,18 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
case DSO_NODELAY:
if (optlen != sizeof(int))
return -EINVAL;
- if (scp->nonagle == 2)
+ if (scp->nonagle == TCP_NAGLE_CORK)
return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : 1;
+ scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
/* if (scp->nonagle == 1) { Push pending frames } */
break;
case DSO_CORK:
if (optlen != sizeof(int))
return -EINVAL;
- if (scp->nonagle == 1)
+ if (scp->nonagle == TCP_NAGLE_OFF)
return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : 2;
+ scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
/* if (scp->nonagle == 0) { Push pending frames } */
break;
@@ -1608,14 +1609,14 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
case DSO_NODELAY:
if (r_len > sizeof(int))
r_len = sizeof(int);
- val = (scp->nonagle == 1);
+ val = (scp->nonagle == TCP_NAGLE_OFF);
r_data = &val;
break;
case DSO_CORK:
if (r_len > sizeof(int))
r_len = sizeof(int);
- val = (scp->nonagle == 2);
+ val = (scp->nonagle == TCP_NAGLE_CORK);
r_data = &val;
break;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 8fdd9f492b0e..9017a9a73ab5 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -565,7 +565,8 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
[IFA_FLAGS] = { .type = NLA_U32 },
};
-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -581,7 +582,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
goto errout;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
+ extack);
if (err < 0)
goto errout;
@@ -609,7 +611,8 @@ errout:
return err;
}
-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -625,7 +628,8 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 7af0ba6157a1..f9058ebeb635 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -501,7 +501,8 @@ static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
return table;
}
-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct dn_fib_table *tb;
@@ -515,7 +516,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+ err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
+ extack);
if (err < 0)
return err;
@@ -526,7 +528,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
}
-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct dn_fib_table *tb;
@@ -540,7 +543,8 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+ err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b1dc096d22f8..4b9518a0d248 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1640,7 +1640,8 @@ const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
/*
* This is called by both endnodes and routers now.
*/
-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -1654,7 +1655,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
if (!net_eq(net, &init_net))
return -EINVAL;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 85f2fdc360c2..c8bf5136a72b 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -96,7 +96,7 @@ static unsigned int dnrmg_hook(void *priv,
}
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0)
static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
{
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 9649238eef40..81a0868edb1d 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -6,7 +6,7 @@ config HAVE_NET_DSA
config NET_DSA
tristate "Distributed Switch Architecture"
- depends on HAVE_NET_DSA
+ depends on HAVE_NET_DSA && MAY_USE_DEVLINK
select NET_SWITCHDEV
select PHYLIB
---help---
@@ -31,4 +31,10 @@ config NET_DSA_TAG_TRAILER
config NET_DSA_TAG_QCA
bool
+config NET_DSA_TAG_MTK
+ bool
+
+config NET_DSA_TAG_LAN9303
+ bool
+
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 31d343796251..0b747d75e65a 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o dsa2.o switch.o
+dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
@@ -8,3 +8,5 @@ dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index b6d4f6a23f06..26130ae438da 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,15 +14,17 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <net/dsa.h>
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
#include <linux/of_net.h>
#include <linux/of_gpio.h>
+#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
#include <linux/gpio/consumer.h>
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -53,62 +55,15 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_QCA
[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_MTK
+ [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_LAN9303
+ [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
+#endif
[DSA_TAG_PROTO_NONE] = &none_ops,
};
-/* switch driver registration ***********************************************/
-static DEFINE_MUTEX(dsa_switch_drivers_mutex);
-static LIST_HEAD(dsa_switch_drivers);
-
-void register_switch_driver(struct dsa_switch_driver *drv)
-{
- mutex_lock(&dsa_switch_drivers_mutex);
- list_add_tail(&drv->list, &dsa_switch_drivers);
- mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(register_switch_driver);
-
-void unregister_switch_driver(struct dsa_switch_driver *drv)
-{
- mutex_lock(&dsa_switch_drivers_mutex);
- list_del_init(&drv->list);
- mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_switch_driver);
-
-static const struct dsa_switch_ops *
-dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
- const char **_name, void **priv)
-{
- const struct dsa_switch_ops *ret;
- struct list_head *list;
- const char *name;
-
- ret = NULL;
- name = NULL;
-
- mutex_lock(&dsa_switch_drivers_mutex);
- list_for_each(list, &dsa_switch_drivers) {
- const struct dsa_switch_ops *ops;
- struct dsa_switch_driver *drv;
-
- drv = list_entry(list, struct dsa_switch_driver, list);
- ops = drv->ops;
-
- name = ops->probe(parent, host_dev, sw_addr, priv);
- if (name != NULL) {
- ret = ops;
- break;
- }
- }
- mutex_unlock(&dsa_switch_drivers_mutex);
-
- *_name = name;
-
- return ret;
-}
-
-/* basic switch operations **************************************************/
int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
struct dsa_port *dport, int port)
{
@@ -140,23 +95,6 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
return 0;
}
-static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
-{
- struct dsa_port *dport;
- int ret, port;
-
- for (port = 0; port < ds->num_ports; port++) {
- if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
- continue;
-
- dport = &ds->ports[port];
- ret = dsa_cpu_dsa_setup(ds, dev, dport, port);
- if (ret)
- return ret;
- }
- return 0;
-}
-
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
{
const struct dsa_device_ops *ops;
@@ -206,168 +144,6 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
master->ethtool_ops = ds->dst->master_orig_ethtool_ops;
}
-static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
-{
- const struct dsa_switch_ops *ops = ds->ops;
- struct dsa_switch_tree *dst = ds->dst;
- struct dsa_chip_data *cd = ds->cd;
- bool valid_name_found = false;
- int index = ds->index;
- int i, ret;
-
- /*
- * Validate supplied switch configuration.
- */
- for (i = 0; i < ds->num_ports; i++) {
- char *name;
-
- name = cd->port_names[i];
- if (name == NULL)
- continue;
-
- if (!strcmp(name, "cpu")) {
- if (dst->cpu_switch) {
- netdev_err(dst->master_netdev,
- "multiple cpu ports?!\n");
- return -EINVAL;
- }
- dst->cpu_switch = ds;
- dst->cpu_port = i;
- ds->cpu_port_mask |= 1 << i;
- } else if (!strcmp(name, "dsa")) {
- ds->dsa_port_mask |= 1 << i;
- } else {
- ds->enabled_port_mask |= 1 << i;
- }
- valid_name_found = true;
- }
-
- if (!valid_name_found && i == ds->num_ports)
- return -EINVAL;
-
- /* Make the built-in MII bus mask match the number of ports,
- * switch drivers can override this later
- */
- ds->phys_mii_mask = ds->enabled_port_mask;
-
- /*
- * If the CPU connects to this switch, set the switch tree
- * tagging protocol to the preferred tagging format of this
- * switch.
- */
- if (dst->cpu_switch == ds) {
- enum dsa_tag_protocol tag_protocol;
-
- tag_protocol = ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops))
- return PTR_ERR(dst->tag_ops);
-
- dst->rcv = dst->tag_ops->rcv;
- }
-
- memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
-
- /*
- * Do basic register setup.
- */
- ret = ops->setup(ds);
- if (ret < 0)
- return ret;
-
- ret = dsa_switch_register_notifier(ds);
- if (ret)
- return ret;
-
- if (ops->set_addr) {
- ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
- if (ret < 0)
- return ret;
- }
-
- if (!ds->slave_mii_bus && ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(parent);
- if (!ds->slave_mii_bus)
- return -ENOMEM;
- dsa_slave_mii_bus_init(ds);
-
- ret = mdiobus_register(ds->slave_mii_bus);
- if (ret < 0)
- return ret;
- }
-
- /*
- * Create network devices for physical switch ports.
- */
- for (i = 0; i < ds->num_ports; i++) {
- ds->ports[i].dn = cd->port_dn[i];
-
- if (!(ds->enabled_port_mask & (1 << i)))
- continue;
-
- ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
- if (ret < 0)
- netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
- index, i, cd->port_names[i], ret);
- }
-
- /* Perform configuration of the CPU and DSA ports */
- ret = dsa_cpu_dsa_setups(ds, parent);
- if (ret < 0)
- netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
- index);
-
- ret = dsa_cpu_port_ethtool_setup(ds);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static struct dsa_switch *
-dsa_switch_setup(struct dsa_switch_tree *dst, int index,
- struct device *parent, struct device *host_dev)
-{
- struct dsa_chip_data *cd = dst->pd->chip + index;
- const struct dsa_switch_ops *ops;
- struct dsa_switch *ds;
- int ret;
- const char *name;
- void *priv;
-
- /*
- * Probe for switch model.
- */
- ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
- if (!ops) {
- netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
- index);
- return ERR_PTR(-EINVAL);
- }
- netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
- index, name);
-
-
- /*
- * Allocate and initialise switch state.
- */
- ds = dsa_switch_alloc(parent, DSA_MAX_PORTS);
- if (!ds)
- return ERR_PTR(-ENOMEM);
-
- ds->dst = dst;
- ds->index = index;
- ds->cd = cd;
- ds->ops = ops;
- ds->priv = priv;
-
- ret = dsa_switch_setup_one(ds, parent);
- if (ret)
- return ERR_PTR(ret);
-
- return ds;
-}
-
void dsa_cpu_dsa_destroy(struct dsa_port *port)
{
struct device_node *port_dn = port->dn;
@@ -376,86 +152,6 @@ void dsa_cpu_dsa_destroy(struct dsa_port *port)
of_phy_deregister_fixed_link(port_dn);
}
-static void dsa_switch_destroy(struct dsa_switch *ds)
-{
- int port;
-
- /* Destroy network devices for physical switch ports. */
- for (port = 0; port < ds->num_ports; port++) {
- if (!(ds->enabled_port_mask & (1 << port)))
- continue;
-
- if (!ds->ports[port].netdev)
- continue;
-
- dsa_slave_destroy(ds->ports[port].netdev);
- }
-
- /* Disable configuration of the CPU and DSA ports */
- for (port = 0; port < ds->num_ports; port++) {
- if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
- continue;
- dsa_cpu_dsa_destroy(&ds->ports[port]);
-
- /* Clearing a bit which is not set does no harm */
- ds->cpu_port_mask |= ~(1 << port);
- ds->dsa_port_mask |= ~(1 << port);
- }
-
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_unregister(ds->slave_mii_bus);
-
- dsa_switch_unregister_notifier(ds);
-}
-
-#ifdef CONFIG_PM_SLEEP
-int dsa_switch_suspend(struct dsa_switch *ds)
-{
- int i, ret = 0;
-
- /* Suspend slave network devices */
- for (i = 0; i < ds->num_ports; i++) {
- if (!dsa_is_port_initialized(ds, i))
- continue;
-
- ret = dsa_slave_suspend(ds->ports[i].netdev);
- if (ret)
- return ret;
- }
-
- if (ds->ops->suspend)
- ret = ds->ops->suspend(ds);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_suspend);
-
-int dsa_switch_resume(struct dsa_switch *ds)
-{
- int i, ret = 0;
-
- if (ds->ops->resume)
- ret = ds->ops->resume(ds);
-
- if (ret)
- return ret;
-
- /* Resume slave network devices */
- for (i = 0; i < ds->num_ports; i++) {
- if (!dsa_is_port_initialized(ds, i))
- continue;
-
- ret = dsa_slave_resume(ds->ports[i].netdev);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_resume);
-#endif
-
-/* platform driver init and cleanup *****************************************/
static int dev_is_class(struct device *dev, void *class)
{
if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -474,24 +170,6 @@ static struct device *dev_find_class(struct device *parent, char *class)
return device_find_child(parent, class, dev_is_class);
}
-struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
-{
- struct device *d;
-
- d = dev_find_class(dev, "mdio_bus");
- if (d != NULL) {
- struct mii_bus *bus;
-
- bus = to_mii_bus(d);
- put_device(d);
-
- return bus;
- }
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus);
-
struct net_device *dsa_dev_to_net_device(struct device *dev)
{
struct device *d;
@@ -511,456 +189,43 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
-#ifdef CONFIG_OF
-static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
- struct dsa_chip_data *cd,
- int chip_index, int port_index,
- struct device_node *link)
-{
- const __be32 *reg;
- int link_sw_addr;
- struct device_node *parent_sw;
- int len;
-
- parent_sw = of_get_parent(link);
- if (!parent_sw)
- return -EINVAL;
-
- reg = of_get_property(parent_sw, "reg", &len);
- if (!reg || (len != sizeof(*reg) * 2))
- return -EINVAL;
-
- /*
- * Get the destination switch number from the second field of its 'reg'
- * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
- */
- link_sw_addr = be32_to_cpup(reg + 1);
-
- if (link_sw_addr >= pd->nr_chips)
- return -EINVAL;
-
- cd->rtable[link_sw_addr] = port_index;
-
- return 0;
-}
-
-static int dsa_of_probe_links(struct dsa_platform_data *pd,
- struct dsa_chip_data *cd,
- int chip_index, int port_index,
- struct device_node *port,
- const char *port_name)
-{
- struct device_node *link;
- int link_index;
- int ret;
-
- for (link_index = 0;; link_index++) {
- link = of_parse_phandle(port, "link", link_index);
- if (!link)
- break;
-
- if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
- ret = dsa_of_setup_routing_table(pd, cd, chip_index,
- port_index, link);
- if (ret)
- return ret;
- }
- }
- return 0;
-}
-
-static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
-{
- int i;
- int port_index;
-
- for (i = 0; i < pd->nr_chips; i++) {
- port_index = 0;
- while (port_index < DSA_MAX_PORTS) {
- kfree(pd->chip[i].port_names[port_index]);
- port_index++;
- }
-
- /* Drop our reference to the MDIO bus device */
- if (pd->chip[i].host_dev)
- put_device(pd->chip[i].host_dev);
- }
- kfree(pd->chip);
-}
-
-static int dsa_of_probe(struct device *dev)
-{
- struct device_node *np = dev->of_node;
- struct device_node *child, *mdio, *ethernet, *port;
- struct mii_bus *mdio_bus, *mdio_bus_switch;
- struct net_device *ethernet_dev;
- struct dsa_platform_data *pd;
- struct dsa_chip_data *cd;
- const char *port_name;
- int chip_index, port_index;
- const unsigned int *sw_addr, *port_reg;
- u32 eeprom_len;
- int ret;
-
- mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
- if (!mdio)
- return -EINVAL;
-
- mdio_bus = of_mdio_find_bus(mdio);
- if (!mdio_bus)
- return -EPROBE_DEFER;
-
- ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
- if (!ethernet) {
- ret = -EINVAL;
- goto out_put_mdio;
- }
-
- ethernet_dev = of_find_net_device_by_node(ethernet);
- if (!ethernet_dev) {
- ret = -EPROBE_DEFER;
- goto out_put_mdio;
- }
-
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd) {
- ret = -ENOMEM;
- goto out_put_ethernet;
- }
-
- dev->platform_data = pd;
- pd->of_netdev = ethernet_dev;
- pd->nr_chips = of_get_available_child_count(np);
- if (pd->nr_chips > DSA_MAX_SWITCHES)
- pd->nr_chips = DSA_MAX_SWITCHES;
-
- pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data),
- GFP_KERNEL);
- if (!pd->chip) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- chip_index = -1;
- for_each_available_child_of_node(np, child) {
- int i;
-
- chip_index++;
- cd = &pd->chip[chip_index];
-
- cd->of_node = child;
-
- /* Initialize the routing table */
- for (i = 0; i < DSA_MAX_SWITCHES; ++i)
- cd->rtable[i] = DSA_RTABLE_NONE;
-
- /* When assigning the host device, increment its refcount */
- cd->host_dev = get_device(&mdio_bus->dev);
-
- sw_addr = of_get_property(child, "reg", NULL);
- if (!sw_addr)
- continue;
-
- cd->sw_addr = be32_to_cpup(sw_addr);
- if (cd->sw_addr >= PHY_MAX_ADDR)
- continue;
-
- if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
- cd->eeprom_len = eeprom_len;
-
- mdio = of_parse_phandle(child, "mii-bus", 0);
- if (mdio) {
- mdio_bus_switch = of_mdio_find_bus(mdio);
- if (!mdio_bus_switch) {
- ret = -EPROBE_DEFER;
- goto out_free_chip;
- }
-
- /* Drop the mdio_bus device ref, replacing the host
- * device with the mdio_bus_switch device, keeping
- * the refcount from of_mdio_find_bus() above.
- */
- put_device(cd->host_dev);
- cd->host_dev = &mdio_bus_switch->dev;
- }
-
- for_each_available_child_of_node(child, port) {
- port_reg = of_get_property(port, "reg", NULL);
- if (!port_reg)
- continue;
-
- port_index = be32_to_cpup(port_reg);
- if (port_index >= DSA_MAX_PORTS)
- break;
-
- port_name = of_get_property(port, "label", NULL);
- if (!port_name)
- continue;
-
- cd->port_dn[port_index] = port;
-
- cd->port_names[port_index] = kstrdup(port_name,
- GFP_KERNEL);
- if (!cd->port_names[port_index]) {
- ret = -ENOMEM;
- goto out_free_chip;
- }
-
- ret = dsa_of_probe_links(pd, cd, chip_index,
- port_index, port, port_name);
- if (ret)
- goto out_free_chip;
-
- }
- }
-
- /* The individual chips hold their own refcount on the mdio bus,
- * so drop ours */
- put_device(&mdio_bus->dev);
-
- return 0;
-
-out_free_chip:
- dsa_of_free_platform_data(pd);
-out_free:
- kfree(pd);
- dev->platform_data = NULL;
-out_put_ethernet:
- put_device(&ethernet_dev->dev);
-out_put_mdio:
- put_device(&mdio_bus->dev);
- return ret;
-}
-
-static void dsa_of_remove(struct device *dev)
-{
- struct dsa_platform_data *pd = dev->platform_data;
-
- if (!dev->of_node)
- return;
-
- dsa_of_free_platform_data(pd);
- put_device(&pd->of_netdev->dev);
- kfree(pd);
-}
-#else
-static inline int dsa_of_probe(struct device *dev)
-{
- return 0;
-}
-
-static inline void dsa_of_remove(struct device *dev)
-{
-}
-#endif
-
-static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
- struct device *parent, struct dsa_platform_data *pd)
-{
- int i;
- unsigned configured = 0;
-
- dst->pd = pd;
- dst->master_netdev = dev;
- dst->cpu_port = -1;
-
- for (i = 0; i < pd->nr_chips; i++) {
- struct dsa_switch *ds;
-
- ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev);
- if (IS_ERR(ds)) {
- netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
- i, PTR_ERR(ds));
- continue;
- }
-
- dst->ds[i] = ds;
-
- ++configured;
- }
-
- /*
- * If no switch was found, exit cleanly
- */
- if (!configured)
- return -EPROBE_DEFER;
-
- /*
- * If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dev->dsa_ptr = (void *)dst;
-
- return 0;
-}
-
-static int dsa_probe(struct platform_device *pdev)
-{
- struct dsa_platform_data *pd = pdev->dev.platform_data;
- struct net_device *dev;
- struct dsa_switch_tree *dst;
- int ret;
-
- if (pdev->dev.of_node) {
- ret = dsa_of_probe(&pdev->dev);
- if (ret)
- return ret;
-
- pd = pdev->dev.platform_data;
- }
-
- if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL))
- return -EINVAL;
-
- if (pd->of_netdev) {
- dev = pd->of_netdev;
- dev_hold(dev);
- } else {
- dev = dsa_dev_to_net_device(pd->netdev);
- }
- if (dev == NULL) {
- ret = -EPROBE_DEFER;
- goto out;
- }
-
- if (dev->dsa_ptr != NULL) {
- dev_put(dev);
- ret = -EEXIST;
- goto out;
- }
-
- dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL);
- if (dst == NULL) {
- dev_put(dev);
- ret = -ENOMEM;
- goto out;
- }
-
- platform_set_drvdata(pdev, dst);
-
- ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
- if (ret) {
- dev_put(dev);
- goto out;
- }
-
- return 0;
-
-out:
- dsa_of_remove(&pdev->dev);
-
- return ret;
-}
-
-static void dsa_remove_dst(struct dsa_switch_tree *dst)
-{
- int i;
-
- dst->master_netdev->dsa_ptr = NULL;
-
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
-
- if (ds)
- dsa_switch_destroy(ds);
- }
-
- dsa_cpu_port_ethtool_restore(dst->cpu_switch);
-
- dev_put(dst->master_netdev);
-}
-
-static int dsa_remove(struct platform_device *pdev)
-{
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
-
- dsa_remove_dst(dst);
- dsa_of_remove(&pdev->dev);
-
- return 0;
-}
-
-static void dsa_shutdown(struct platform_device *pdev)
-{
-}
-
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct sk_buff *nskb = NULL;
if (unlikely(dst == NULL)) {
kfree_skb(skb);
return 0;
}
- return dst->rcv(skb, dev, pt, orig_dev);
-}
-
-static struct packet_type dsa_pack_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_XDSA),
- .func = dsa_switch_rcv,
-};
-
-#ifdef CONFIG_PM_SLEEP
-static int dsa_suspend(struct device *d)
-{
- struct platform_device *pdev = to_platform_device(d);
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
- int i, ret = 0;
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ return 0;
- if (ds != NULL)
- ret = dsa_switch_suspend(ds);
+ nskb = dst->rcv(skb, dev, pt, orig_dev);
+ if (!nskb) {
+ kfree_skb(skb);
+ return 0;
}
- return ret;
-}
-
-static int dsa_resume(struct device *d)
-{
- struct platform_device *pdev = to_platform_device(d);
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
- int i, ret = 0;
+ skb = nskb;
+ skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, skb->dev);
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
+ skb->dev->stats.rx_packets++;
+ skb->dev->stats.rx_bytes += skb->len;
- if (ds != NULL)
- ret = dsa_switch_resume(ds);
- }
+ netif_receive_skb(skb);
- return ret;
+ return 0;
}
-#endif
-
-static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
-static const struct of_device_id dsa_of_match_table[] = {
- { .compatible = "marvell,dsa", },
- {}
-};
-MODULE_DEVICE_TABLE(of, dsa_of_match_table);
-
-static struct platform_driver dsa_driver = {
- .probe = dsa_probe,
- .remove = dsa_remove,
- .shutdown = dsa_shutdown,
- .driver = {
- .name = "dsa",
- .of_match_table = dsa_of_match_table,
- .pm = &dsa_pm_ops,
- },
+static struct packet_type dsa_pack_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_XDSA),
+ .func = dsa_switch_rcv,
};
static int __init dsa_init_module(void)
@@ -971,7 +236,7 @@ static int __init dsa_init_module(void)
if (rc)
return rc;
- rc = platform_driver_register(&dsa_driver);
+ rc = dsa_legacy_register();
if (rc)
return rc;
@@ -985,7 +250,7 @@ static void __exit dsa_cleanup_module(void)
{
dsa_slave_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
- platform_driver_unregister(&dsa_driver);
+ dsa_legacy_unregister();
}
module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 737be6470c7f..033b3bfb63dc 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -13,16 +13,20 @@
#include <linux/device.h>
#include <linux/err.h>
#include <linux/list.h>
+#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/rtnetlink.h>
-#include <net/dsa.h>
#include <linux/of.h>
#include <linux/of_net.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static LIST_HEAD(dsa_switch_trees);
static DEFINE_MUTEX(dsa2_mutex);
+static const struct devlink_ops dsa_devlink_ops = {
+};
+
static struct dsa_switch_tree *dsa_get_dst(u32 tree)
{
struct dsa_switch_tree *dst;
@@ -222,12 +226,18 @@ static int dsa_dsa_port_apply(struct dsa_port *port, u32 index,
return err;
}
- return 0;
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+
+ return devlink_port_register(ds->devlink,
+ &ds->ports[index].devlink_port,
+ index);
}
static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
dsa_cpu_dsa_destroy(port);
}
@@ -245,12 +255,17 @@ static int dsa_cpu_port_apply(struct dsa_port *port, u32 index,
ds->cpu_port_mask |= BIT(index);
- return 0;
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+ err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+ index);
+ return err;
}
static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
dsa_cpu_dsa_destroy(port);
ds->cpu_port_mask &= ~BIT(index);
@@ -275,12 +290,23 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index,
return err;
}
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+ err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+ index);
+ if (err)
+ return err;
+
+ devlink_port_type_eth_set(&ds->ports[index].devlink_port,
+ ds->ports[index].netdev);
+
return 0;
}
static void dsa_user_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
if (ds->ports[index].netdev) {
dsa_slave_destroy(ds->ports[index].netdev);
ds->ports[index].netdev = NULL;
@@ -301,6 +327,17 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
*/
ds->phys_mii_mask = ds->enabled_port_mask;
+ /* Add the switch to devlink before calling setup, so that setup can
+ * add dpipe tables
+ */
+ ds->devlink = devlink_alloc(&dsa_devlink_ops, 0);
+ if (!ds->devlink)
+ return -ENOMEM;
+
+ err = devlink_register(ds->devlink, ds->dev);
+ if (err)
+ return err;
+
err = ds->ops->setup(ds);
if (err < 0)
return err;
@@ -381,6 +418,13 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
mdiobus_unregister(ds->slave_mii_bus);
dsa_switch_unregister_notifier(ds);
+
+ if (ds->devlink) {
+ devlink_unregister(ds->devlink);
+ devlink_free(ds->devlink);
+ ds->devlink = NULL;
+ }
+
}
static int dsa_dst_apply(struct dsa_switch_tree *dst)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 0706a511244e..f4a88e485213 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -17,8 +17,9 @@
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
- int (*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev);
};
struct dsa_slave_priv {
@@ -54,6 +55,10 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
+/* legacy.c */
+int dsa_legacy_register(void);
+void dsa_legacy_unregister(void);
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
@@ -85,4 +90,10 @@ extern const struct dsa_device_ops brcm_netdev_ops;
/* tag_qca.c */
extern const struct dsa_device_ops qca_netdev_ops;
+/* tag_mtk.c */
+extern const struct dsa_device_ops mtk_netdev_ops;
+
+/* tag_lan9303.c */
+extern const struct dsa_device_ops lan9303_netdev_ops;
+
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
new file mode 100644
index 000000000000..ad345c8b0b06
--- /dev/null
+++ b/net/dsa/legacy.c
@@ -0,0 +1,818 @@
+/*
+ * net/dsa/legacy.c - Hardware switch handling
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_platform.h>
+#include <linux/of_net.h>
+#include <linux/netdevice.h>
+#include <linux/sysfs.h>
+#include <linux/phy_fixed.h>
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+/* switch driver registration ***********************************************/
+static DEFINE_MUTEX(dsa_switch_drivers_mutex);
+static LIST_HEAD(dsa_switch_drivers);
+
+void register_switch_driver(struct dsa_switch_driver *drv)
+{
+ mutex_lock(&dsa_switch_drivers_mutex);
+ list_add_tail(&drv->list, &dsa_switch_drivers);
+ mutex_unlock(&dsa_switch_drivers_mutex);
+}
+EXPORT_SYMBOL_GPL(register_switch_driver);
+
+void unregister_switch_driver(struct dsa_switch_driver *drv)
+{
+ mutex_lock(&dsa_switch_drivers_mutex);
+ list_del_init(&drv->list);
+ mutex_unlock(&dsa_switch_drivers_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_switch_driver);
+
+static const struct dsa_switch_ops *
+dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
+ const char **_name, void **priv)
+{
+ const struct dsa_switch_ops *ret;
+ struct list_head *list;
+ const char *name;
+
+ ret = NULL;
+ name = NULL;
+
+ mutex_lock(&dsa_switch_drivers_mutex);
+ list_for_each(list, &dsa_switch_drivers) {
+ const struct dsa_switch_ops *ops;
+ struct dsa_switch_driver *drv;
+
+ drv = list_entry(list, struct dsa_switch_driver, list);
+ ops = drv->ops;
+
+ name = ops->probe(parent, host_dev, sw_addr, priv);
+ if (name != NULL) {
+ ret = ops;
+ break;
+ }
+ }
+ mutex_unlock(&dsa_switch_drivers_mutex);
+
+ *_name = name;
+
+ return ret;
+}
+
+/* basic switch operations **************************************************/
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+{
+ struct dsa_port *dport;
+ int ret, port;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
+ continue;
+
+ dport = &ds->ports[port];
+ ret = dsa_cpu_dsa_setup(ds, dev, dport, port);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
+{
+ const struct dsa_switch_ops *ops = ds->ops;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_chip_data *cd = ds->cd;
+ bool valid_name_found = false;
+ int index = ds->index;
+ int i, ret;
+
+ /*
+ * Validate supplied switch configuration.
+ */
+ for (i = 0; i < ds->num_ports; i++) {
+ char *name;
+
+ name = cd->port_names[i];
+ if (name == NULL)
+ continue;
+
+ if (!strcmp(name, "cpu")) {
+ if (dst->cpu_switch) {
+ netdev_err(dst->master_netdev,
+ "multiple cpu ports?!\n");
+ return -EINVAL;
+ }
+ dst->cpu_switch = ds;
+ dst->cpu_port = i;
+ ds->cpu_port_mask |= 1 << i;
+ } else if (!strcmp(name, "dsa")) {
+ ds->dsa_port_mask |= 1 << i;
+ } else {
+ ds->enabled_port_mask |= 1 << i;
+ }
+ valid_name_found = true;
+ }
+
+ if (!valid_name_found && i == ds->num_ports)
+ return -EINVAL;
+
+ /* Make the built-in MII bus mask match the number of ports,
+ * switch drivers can override this later
+ */
+ ds->phys_mii_mask = ds->enabled_port_mask;
+
+ /*
+ * If the CPU connects to this switch, set the switch tree
+ * tagging protocol to the preferred tagging format of this
+ * switch.
+ */
+ if (dst->cpu_switch == ds) {
+ enum dsa_tag_protocol tag_protocol;
+
+ tag_protocol = ops->get_tag_protocol(ds);
+ dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(dst->tag_ops))
+ return PTR_ERR(dst->tag_ops);
+
+ dst->rcv = dst->tag_ops->rcv;
+ }
+
+ memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
+
+ /*
+ * Do basic register setup.
+ */
+ ret = ops->setup(ds);
+ if (ret < 0)
+ return ret;
+
+ ret = dsa_switch_register_notifier(ds);
+ if (ret)
+ return ret;
+
+ if (ops->set_addr) {
+ ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!ds->slave_mii_bus && ops->phy_read) {
+ ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+ if (!ds->slave_mii_bus)
+ return -ENOMEM;
+ dsa_slave_mii_bus_init(ds);
+
+ ret = mdiobus_register(ds->slave_mii_bus);
+ if (ret < 0)
+ return ret;
+ }
+
+ /*
+ * Create network devices for physical switch ports.
+ */
+ for (i = 0; i < ds->num_ports; i++) {
+ ds->ports[i].dn = cd->port_dn[i];
+
+ if (!(ds->enabled_port_mask & (1 << i)))
+ continue;
+
+ ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
+ if (ret < 0)
+ netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
+ index, i, cd->port_names[i], ret);
+ }
+
+ /* Perform configuration of the CPU and DSA ports */
+ ret = dsa_cpu_dsa_setups(ds, parent);
+ if (ret < 0)
+ netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
+ index);
+
+ ret = dsa_cpu_port_ethtool_setup(ds);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static struct dsa_switch *
+dsa_switch_setup(struct dsa_switch_tree *dst, int index,
+ struct device *parent, struct device *host_dev)
+{
+ struct dsa_chip_data *cd = dst->pd->chip + index;
+ const struct dsa_switch_ops *ops;
+ struct dsa_switch *ds;
+ int ret;
+ const char *name;
+ void *priv;
+
+ /*
+ * Probe for switch model.
+ */
+ ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
+ if (!ops) {
+ netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
+ index);
+ return ERR_PTR(-EINVAL);
+ }
+ netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
+ index, name);
+
+
+ /*
+ * Allocate and initialise switch state.
+ */
+ ds = dsa_switch_alloc(parent, DSA_MAX_PORTS);
+ if (!ds)
+ return ERR_PTR(-ENOMEM);
+
+ ds->dst = dst;
+ ds->index = index;
+ ds->cd = cd;
+ ds->ops = ops;
+ ds->priv = priv;
+
+ ret = dsa_switch_setup_one(ds, parent);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return ds;
+}
+
+static void dsa_switch_destroy(struct dsa_switch *ds)
+{
+ int port;
+
+ /* Destroy network devices for physical switch ports. */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!(ds->enabled_port_mask & (1 << port)))
+ continue;
+
+ if (!ds->ports[port].netdev)
+ continue;
+
+ dsa_slave_destroy(ds->ports[port].netdev);
+ }
+
+ /* Disable configuration of the CPU and DSA ports */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
+ continue;
+ dsa_cpu_dsa_destroy(&ds->ports[port]);
+
+ /* Clearing a bit which is not set does no harm */
+ ds->cpu_port_mask |= ~(1 << port);
+ ds->dsa_port_mask |= ~(1 << port);
+ }
+
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_unregister(ds->slave_mii_bus);
+
+ dsa_switch_unregister_notifier(ds);
+}
+
+#ifdef CONFIG_PM_SLEEP
+int dsa_switch_suspend(struct dsa_switch *ds)
+{
+ int i, ret = 0;
+
+ /* Suspend slave network devices */
+ for (i = 0; i < ds->num_ports; i++) {
+ if (!dsa_is_port_initialized(ds, i))
+ continue;
+
+ ret = dsa_slave_suspend(ds->ports[i].netdev);
+ if (ret)
+ return ret;
+ }
+
+ if (ds->ops->suspend)
+ ret = ds->ops->suspend(ds);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
+
+int dsa_switch_resume(struct dsa_switch *ds)
+{
+ int i, ret = 0;
+
+ if (ds->ops->resume)
+ ret = ds->ops->resume(ds);
+
+ if (ret)
+ return ret;
+
+ /* Resume slave network devices */
+ for (i = 0; i < ds->num_ports; i++) {
+ if (!dsa_is_port_initialized(ds, i))
+ continue;
+
+ ret = dsa_slave_resume(ds->ports[i].netdev);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
+#endif
+
+/* platform driver init and cleanup *****************************************/
+static int dev_is_class(struct device *dev, void *class)
+{
+ if (dev->class != NULL && !strcmp(dev->class->name, class))
+ return 1;
+
+ return 0;
+}
+
+static struct device *dev_find_class(struct device *parent, char *class)
+{
+ if (dev_is_class(parent, class)) {
+ get_device(parent);
+ return parent;
+ }
+
+ return device_find_child(parent, class, dev_is_class);
+}
+
+struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
+{
+ struct device *d;
+
+ d = dev_find_class(dev, "mdio_bus");
+ if (d != NULL) {
+ struct mii_bus *bus;
+
+ bus = to_mii_bus(d);
+ put_device(d);
+
+ return bus;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus);
+
+#ifdef CONFIG_OF
+static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
+ struct dsa_chip_data *cd,
+ int chip_index, int port_index,
+ struct device_node *link)
+{
+ const __be32 *reg;
+ int link_sw_addr;
+ struct device_node *parent_sw;
+ int len;
+
+ parent_sw = of_get_parent(link);
+ if (!parent_sw)
+ return -EINVAL;
+
+ reg = of_get_property(parent_sw, "reg", &len);
+ if (!reg || (len != sizeof(*reg) * 2))
+ return -EINVAL;
+
+ /*
+ * Get the destination switch number from the second field of its 'reg'
+ * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
+ */
+ link_sw_addr = be32_to_cpup(reg + 1);
+
+ if (link_sw_addr >= pd->nr_chips)
+ return -EINVAL;
+
+ cd->rtable[link_sw_addr] = port_index;
+
+ return 0;
+}
+
+static int dsa_of_probe_links(struct dsa_platform_data *pd,
+ struct dsa_chip_data *cd,
+ int chip_index, int port_index,
+ struct device_node *port,
+ const char *port_name)
+{
+ struct device_node *link;
+ int link_index;
+ int ret;
+
+ for (link_index = 0;; link_index++) {
+ link = of_parse_phandle(port, "link", link_index);
+ if (!link)
+ break;
+
+ if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
+ ret = dsa_of_setup_routing_table(pd, cd, chip_index,
+ port_index, link);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
+{
+ int i;
+ int port_index;
+
+ for (i = 0; i < pd->nr_chips; i++) {
+ port_index = 0;
+ while (port_index < DSA_MAX_PORTS) {
+ kfree(pd->chip[i].port_names[port_index]);
+ port_index++;
+ }
+
+ /* Drop our reference to the MDIO bus device */
+ if (pd->chip[i].host_dev)
+ put_device(pd->chip[i].host_dev);
+ }
+ kfree(pd->chip);
+}
+
+static int dsa_of_probe(struct device *dev)
+{
+ struct device_node *np = dev->of_node;
+ struct device_node *child, *mdio, *ethernet, *port;
+ struct mii_bus *mdio_bus, *mdio_bus_switch;
+ struct net_device *ethernet_dev;
+ struct dsa_platform_data *pd;
+ struct dsa_chip_data *cd;
+ const char *port_name;
+ int chip_index, port_index;
+ const unsigned int *sw_addr, *port_reg;
+ u32 eeprom_len;
+ int ret;
+
+ mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
+ if (!mdio)
+ return -EINVAL;
+
+ mdio_bus = of_mdio_find_bus(mdio);
+ if (!mdio_bus)
+ return -EPROBE_DEFER;
+
+ ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
+ if (!ethernet) {
+ ret = -EINVAL;
+ goto out_put_mdio;
+ }
+
+ ethernet_dev = of_find_net_device_by_node(ethernet);
+ if (!ethernet_dev) {
+ ret = -EPROBE_DEFER;
+ goto out_put_mdio;
+ }
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd) {
+ ret = -ENOMEM;
+ goto out_put_ethernet;
+ }
+
+ dev->platform_data = pd;
+ pd->of_netdev = ethernet_dev;
+ pd->nr_chips = of_get_available_child_count(np);
+ if (pd->nr_chips > DSA_MAX_SWITCHES)
+ pd->nr_chips = DSA_MAX_SWITCHES;
+
+ pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data),
+ GFP_KERNEL);
+ if (!pd->chip) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ chip_index = -1;
+ for_each_available_child_of_node(np, child) {
+ int i;
+
+ chip_index++;
+ cd = &pd->chip[chip_index];
+
+ cd->of_node = child;
+
+ /* Initialize the routing table */
+ for (i = 0; i < DSA_MAX_SWITCHES; ++i)
+ cd->rtable[i] = DSA_RTABLE_NONE;
+
+ /* When assigning the host device, increment its refcount */
+ cd->host_dev = get_device(&mdio_bus->dev);
+
+ sw_addr = of_get_property(child, "reg", NULL);
+ if (!sw_addr)
+ continue;
+
+ cd->sw_addr = be32_to_cpup(sw_addr);
+ if (cd->sw_addr >= PHY_MAX_ADDR)
+ continue;
+
+ if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
+ cd->eeprom_len = eeprom_len;
+
+ mdio = of_parse_phandle(child, "mii-bus", 0);
+ if (mdio) {
+ mdio_bus_switch = of_mdio_find_bus(mdio);
+ if (!mdio_bus_switch) {
+ ret = -EPROBE_DEFER;
+ goto out_free_chip;
+ }
+
+ /* Drop the mdio_bus device ref, replacing the host
+ * device with the mdio_bus_switch device, keeping
+ * the refcount from of_mdio_find_bus() above.
+ */
+ put_device(cd->host_dev);
+ cd->host_dev = &mdio_bus_switch->dev;
+ }
+
+ for_each_available_child_of_node(child, port) {
+ port_reg = of_get_property(port, "reg", NULL);
+ if (!port_reg)
+ continue;
+
+ port_index = be32_to_cpup(port_reg);
+ if (port_index >= DSA_MAX_PORTS)
+ break;
+
+ port_name = of_get_property(port, "label", NULL);
+ if (!port_name)
+ continue;
+
+ cd->port_dn[port_index] = port;
+
+ cd->port_names[port_index] = kstrdup(port_name,
+ GFP_KERNEL);
+ if (!cd->port_names[port_index]) {
+ ret = -ENOMEM;
+ goto out_free_chip;
+ }
+
+ ret = dsa_of_probe_links(pd, cd, chip_index,
+ port_index, port, port_name);
+ if (ret)
+ goto out_free_chip;
+
+ }
+ }
+
+ /* The individual chips hold their own refcount on the mdio bus,
+ * so drop ours */
+ put_device(&mdio_bus->dev);
+
+ return 0;
+
+out_free_chip:
+ dsa_of_free_platform_data(pd);
+out_free:
+ kfree(pd);
+ dev->platform_data = NULL;
+out_put_ethernet:
+ put_device(&ethernet_dev->dev);
+out_put_mdio:
+ put_device(&mdio_bus->dev);
+ return ret;
+}
+
+static void dsa_of_remove(struct device *dev)
+{
+ struct dsa_platform_data *pd = dev->platform_data;
+
+ if (!dev->of_node)
+ return;
+
+ dsa_of_free_platform_data(pd);
+ put_device(&pd->of_netdev->dev);
+ kfree(pd);
+}
+#else
+static inline int dsa_of_probe(struct device *dev)
+{
+ return 0;
+}
+
+static inline void dsa_of_remove(struct device *dev)
+{
+}
+#endif
+
+static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
+ struct device *parent, struct dsa_platform_data *pd)
+{
+ int i;
+ unsigned configured = 0;
+
+ dst->pd = pd;
+ dst->master_netdev = dev;
+ dst->cpu_port = -1;
+
+ for (i = 0; i < pd->nr_chips; i++) {
+ struct dsa_switch *ds;
+
+ ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev);
+ if (IS_ERR(ds)) {
+ netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
+ i, PTR_ERR(ds));
+ continue;
+ }
+
+ dst->ds[i] = ds;
+
+ ++configured;
+ }
+
+ /*
+ * If no switch was found, exit cleanly
+ */
+ if (!configured)
+ return -EPROBE_DEFER;
+
+ /*
+ * If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+ dev->dsa_ptr = (void *)dst;
+
+ return 0;
+}
+
+static int dsa_probe(struct platform_device *pdev)
+{
+ struct dsa_platform_data *pd = pdev->dev.platform_data;
+ struct net_device *dev;
+ struct dsa_switch_tree *dst;
+ int ret;
+
+ if (pdev->dev.of_node) {
+ ret = dsa_of_probe(&pdev->dev);
+ if (ret)
+ return ret;
+
+ pd = pdev->dev.platform_data;
+ }
+
+ if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL))
+ return -EINVAL;
+
+ if (pd->of_netdev) {
+ dev = pd->of_netdev;
+ dev_hold(dev);
+ } else {
+ dev = dsa_dev_to_net_device(pd->netdev);
+ }
+ if (dev == NULL) {
+ ret = -EPROBE_DEFER;
+ goto out;
+ }
+
+ if (dev->dsa_ptr != NULL) {
+ dev_put(dev);
+ ret = -EEXIST;
+ goto out;
+ }
+
+ dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL);
+ if (dst == NULL) {
+ dev_put(dev);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ platform_set_drvdata(pdev, dst);
+
+ ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
+ if (ret) {
+ dev_put(dev);
+ goto out;
+ }
+
+ return 0;
+
+out:
+ dsa_of_remove(&pdev->dev);
+
+ return ret;
+}
+
+static void dsa_remove_dst(struct dsa_switch_tree *dst)
+{
+ int i;
+
+ dst->master_netdev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds)
+ dsa_switch_destroy(ds);
+ }
+
+ dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+
+ dev_put(dst->master_netdev);
+}
+
+static int dsa_remove(struct platform_device *pdev)
+{
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+
+ dsa_remove_dst(dst);
+ dsa_of_remove(&pdev->dev);
+
+ return 0;
+}
+
+static void dsa_shutdown(struct platform_device *pdev)
+{
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int dsa_suspend(struct device *d)
+{
+ struct platform_device *pdev = to_platform_device(d);
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+ int i, ret = 0;
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds != NULL)
+ ret = dsa_switch_suspend(ds);
+ }
+
+ return ret;
+}
+
+static int dsa_resume(struct device *d)
+{
+ struct platform_device *pdev = to_platform_device(d);
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+ int i, ret = 0;
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds != NULL)
+ ret = dsa_switch_resume(ds);
+ }
+
+ return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
+
+static const struct of_device_id dsa_of_match_table[] = {
+ { .compatible = "marvell,dsa", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dsa_of_match_table);
+
+static struct platform_driver dsa_driver = {
+ .probe = dsa_probe,
+ .remove = dsa_remove,
+ .shutdown = dsa_shutdown,
+ .driver = {
+ .name = "dsa",
+ .of_match_table = dsa_of_match_table,
+ .pm = &dsa_pm_ops,
+ },
+};
+
+int dsa_legacy_register(void)
+{
+ return platform_driver_register(&dsa_driver);
+}
+
+void dsa_legacy_unregister(void)
+{
+ platform_driver_unregister(&dsa_driver);
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index c34872e1febc..7693182df81e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,6 +17,7 @@
#include <linux/of_mdio.h>
#include <linux/mdio.h>
#include <linux/list.h>
+#include <net/dsa.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
@@ -419,8 +420,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
return 0;
}
-static int dsa_fastest_ageing_time(struct dsa_switch *ds,
- unsigned int ageing_time)
+static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
+ unsigned int ageing_time)
{
int i;
@@ -443,9 +444,13 @@ static int dsa_slave_ageing_time(struct net_device *dev,
unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
- /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
- if (switchdev_trans_ph_prepare(trans))
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+ return -ERANGE;
+ if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+ return -ERANGE;
return 0;
+ }
/* Keep the fastest ageing time in case of multiple bridges */
p->dp->ageing_time = ageing_time;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 6456dacf9ae9..ca6e26e514f0 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -1,7 +1,8 @@
/*
* Handling of a single switch chip, part of a switch fabric
*
- * Copyright (c) 2017 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,9 +20,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
if (ds->index == info->sw_index && ds->ops->port_bridge_join)
return ds->ops->port_bridge_join(ds, info->port, info->br);
- if (ds->index != info->sw_index)
- dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n",
- info->sw_index, info->port, netdev_name(info->br));
+ if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join)
+ return ds->ops->crosschip_bridge_join(ds, info->sw_index,
+ info->port, info->br);
return 0;
}
@@ -32,9 +33,9 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (ds->index == info->sw_index && ds->ops->port_bridge_leave)
ds->ops->port_bridge_leave(ds, info->port, info->br);
- if (ds->index != info->sw_index)
- dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n",
- info->sw_index, info->port, netdev_name(info->br));
+ if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave)
+ ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port,
+ info->br);
return 0;
}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 5d925b6b2bb1..2a9b52c5af86 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,6 +12,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
/* This tag length is 4 bytes, older ones were 6 bytes, we do not
@@ -91,23 +92,17 @@ out_free:
return NULL;
}
-static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
int source_port;
u8 *brcm_tag;
- if (unlikely(dst == NULL))
- goto out_drop;
-
ds = dst->cpu_switch;
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
goto out_drop;
@@ -139,22 +134,12 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
skb->dev = ds->ports[source_port].netdev;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops brcm_netdev_ops = {
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 72579ceea381..1c6633f0de01 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -67,8 +68,9 @@ out_free:
return NULL;
}
-static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -76,13 +78,6 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_device;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
goto out_drop;
@@ -164,21 +159,11 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
}
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops dsa_netdev_ops = {
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 648c051817a1..d9c668aa5e54 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -80,8 +81,9 @@ out_free:
return NULL;
}
-static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -89,13 +91,6 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_device;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
goto out_drop;
@@ -183,21 +178,11 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
}
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops edsa_netdev_ops = {
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
new file mode 100644
index 000000000000..70130ed5c21a
--- /dev/null
+++ b/net/dsa/tag_lan9303.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2017 Pengutronix, Juergen Borleis <jbe@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+/* To define the outgoing port and to discover the incoming port a regular
+ * VLAN tag is used by the LAN9303. But its VID meaning is 'special':
+ *
+ * Dest MAC Src MAC TAG Type
+ * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 | 1 2 |...
+ * |<------->|
+ * TAG:
+ * |<------------->|
+ * | 1 2 | 3 4 |
+ * TPID VID
+ * 0x8100
+ *
+ * VID bit 3 indicates a request for an ALR lookup.
+ *
+ * If VID bit 3 is zero, then bits 0 and 1 specify the destination port
+ * (0, 1, 2) or broadcast (3) or the source port (1, 2).
+ *
+ * VID bit 4 is used to specify if the STP port state should be overridden.
+ * Required when no forwarding between the external ports should happen.
+ */
+
+#define LAN9303_TAG_LEN 4
+#define LAN9303_MAX_PORTS 3
+
+static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ u16 *lan9303_tag;
+
+ /* insert a special VLAN tag between the MAC addresses
+ * and the current ethertype field.
+ */
+ if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) {
+ dev_dbg(&dev->dev,
+ "Cannot make room for the special tag. Dropping packet\n");
+ goto out_free;
+ }
+
+ /* provide 'LAN9303_TAG_LEN' bytes additional space */
+ skb_push(skb, LAN9303_TAG_LEN);
+
+ /* make room between MACs and Ether-Type */
+ memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN);
+
+ lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
+ lan9303_tag[0] = htons(ETH_P_8021Q);
+ lan9303_tag[1] = htons(p->dp->index | BIT(4));
+
+ return skb;
+out_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ u16 *lan9303_tag;
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds;
+ unsigned int source_port;
+
+ ds = dst->ds[0];
+
+ if (unlikely(!ds)) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
+ return NULL;
+ }
+
+ if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
+ dev_warn_ratelimited(&dev->dev,
+ "Dropping packet, cannot pull\n");
+ return NULL;
+ }
+
+ /* '->data' points into the middle of our special VLAN tag information:
+ *
+ * ~ MAC src | 0x81 | 0x00 | 0xyy | 0xzz | ether type
+ * ^
+ * ->data
+ */
+ lan9303_tag = (u16 *)(skb->data - 2);
+
+ if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
+ return NULL;
+ }
+
+ source_port = ntohs(lan9303_tag[1]) & 0x3;
+
+ if (source_port >= LAN9303_MAX_PORTS) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
+ return NULL;
+ }
+
+ if (!ds->ports[source_port].netdev) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
+ return NULL;
+ }
+
+ /* remove the special VLAN tag between the MAC addresses
+ * and the current ethertype field.
+ */
+ skb_pull_rcsum(skb, 2 + 2);
+ memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
+ 2 * ETH_ALEN);
+
+ /* forward the packet to the dedicated interface */
+ skb->dev = ds->ports[source_port].netdev;
+
+ return skb;
+}
+
+const struct dsa_device_ops lan9303_netdev_ops = {
+ .xmit = lan9303_xmit,
+ .rcv = lan9303_rcv,
+};
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
new file mode 100644
index 000000000000..837cdddb53f0
--- /dev/null
+++ b/net/dsa/tag_mtk.c
@@ -0,0 +1,100 @@
+/*
+ * Mediatek DSA Tag support
+ * Copyright (C) 2017 Landen Chao <landen.chao@mediatek.com>
+ * Sean Wang <sean.wang@mediatek.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+#define MTK_HDR_LEN 4
+#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
+#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
+
+static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ u8 *mtk_tag;
+
+ if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
+ goto out_free;
+
+ skb_push(skb, MTK_HDR_LEN);
+
+ memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+
+ /* Build the tag after the MAC Source Address */
+ mtk_tag = skb->data + 2 * ETH_ALEN;
+ mtk_tag[0] = 0;
+ mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ mtk_tag[2] = 0;
+ mtk_tag[3] = 0;
+
+ return skb;
+
+out_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds;
+ int port;
+ __be16 *phdr, hdr;
+
+ if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
+ goto out_drop;
+
+ /* The MTK header is added by the switch between src addr
+ * and ethertype at this point, skb->data points to 2 bytes
+ * after src addr so header should be 2 bytes right before.
+ */
+ phdr = (__be16 *)(skb->data - 2);
+ hdr = ntohs(*phdr);
+
+ /* Remove MTK tag and recalculate checksum. */
+ skb_pull_rcsum(skb, MTK_HDR_LEN);
+
+ memmove(skb->data - ETH_HLEN,
+ skb->data - ETH_HLEN - MTK_HDR_LEN,
+ 2 * ETH_ALEN);
+
+ /* This protocol doesn't support cascading multiple
+ * switches so it's safe to assume the switch is first
+ * in the tree.
+ */
+ ds = dst->ds[0];
+ if (!ds)
+ goto out_drop;
+
+ /* Get source port information */
+ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
+ if (!ds->ports[port].netdev)
+ goto out_drop;
+
+ skb->dev = ds->ports[port].netdev;
+
+ return skb;
+
+out_drop:
+ return NULL;
+}
+
+const struct dsa_device_ops mtk_netdev_ops = {
+ .xmit = mtk_tag_xmit,
+ .rcv = mtk_tag_rcv,
+};
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 30240f343aea..3ba3f59f7a34 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -12,6 +12,7 @@
*/
#include <linux/etherdevice.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define QCA_HDR_LEN 2
@@ -65,8 +66,9 @@ out_free:
return NULL;
}
-static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -74,13 +76,6 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
int port;
__be16 *phdr, hdr;
- if (unlikely(!dst))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
goto out_drop;
@@ -114,22 +109,12 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
goto out_drop;
/* Update skb & forward the frame accordingly */
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
skb->dev = ds->ports[port].netdev;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops qca_netdev_ops = {
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 26f977176978..aafc2fc74c30 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -57,22 +58,17 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
return nskb;
}
-static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
u8 *trailer;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
ds = dst->cpu_switch;
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (skb_linearize(skb))
goto out_drop;
@@ -88,21 +84,11 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
pskb_trim_rcsum(skb, skb->len - 4);
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops trailer_netdev_ops = {
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 1ab30e7d3f99..81dac16933fc 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -350,7 +350,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
return 0;
invalid:
- netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+ netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
nla_put_failure:
@@ -432,7 +432,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
return 0;
invalid:
- netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+ netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL);
return 0;
nla_put_failure:
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index fc60cd061f39..99f6c254ea77 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -249,8 +249,7 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
genl_family_attrbuf(&nl802154_fam),
- nl802154_fam.maxattr,
- nl802154_policy);
+ nl802154_fam.maxattr, nl802154_policy, NULL);
if (err)
goto out_unlock;
@@ -562,8 +561,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct nl802154_dump_wpan_phy_state *state)
{
struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
- int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
- tb, nl802154_fam.maxattr, nl802154_policy);
+ int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb,
+ nl802154_fam.maxattr, nl802154_policy, NULL);
/* TODO check if we can handle error here,
* we have no backward compatibility
@@ -1308,7 +1307,7 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1];
if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla,
- nl802154_dev_addr_policy))
+ nl802154_dev_addr_policy, NULL))
return -EINVAL;
if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] ||
@@ -1348,7 +1347,7 @@ ieee802154_llsec_parse_key_id(struct nlattr *nla,
struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1];
if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla,
- nl802154_key_id_policy))
+ nl802154_key_id_policy, NULL))
return -EINVAL;
if (!attrs[NL802154_KEY_ID_ATTR_MODE])
@@ -1565,7 +1564,7 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info)
if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
- nl802154_key_policy))
+ nl802154_key_policy, info->extack))
return -EINVAL;
if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] ||
@@ -1615,7 +1614,7 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info)
if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
- nl802154_key_policy))
+ nl802154_key_policy, info->extack))
return -EINVAL;
if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
@@ -1729,8 +1728,8 @@ ieee802154_llsec_parse_device(struct nlattr *nla,
{
struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
- if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla,
- nl802154_dev_policy))
+ if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
+ nla, nl802154_dev_policy, NULL))
return -EINVAL;
memset(dev, 0, sizeof(*dev));
@@ -1783,7 +1782,7 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info)
if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVICE],
- nl802154_dev_policy))
+ nl802154_dev_policy, info->extack))
return -EINVAL;
if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR])
@@ -1911,7 +1910,7 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info
if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
- nl802154_devkey_policy) < 0)
+ nl802154_devkey_policy, info->extack) < 0)
return -EINVAL;
if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] ||
@@ -1943,7 +1942,7 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info
if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
- nl802154_devkey_policy))
+ nl802154_devkey_policy, info->extack))
return -EINVAL;
if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
@@ -2063,8 +2062,8 @@ llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl)
{
struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1];
- if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla,
- nl802154_seclevel_policy))
+ if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX,
+ nla, nl802154_seclevel_policy, NULL))
return -EINVAL;
memset(sl, 0, sizeof(*sl));
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6d4238ff94a..f83de23a30e7 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_rate.o tcp_recovery.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
- fib_frontend.o fib_semantics.o fib_trie.o \
+ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6b1fc6e4278e..d1a11707a126 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
+ .early_demux_handler = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = {
.icmp_strict_tag_validation = 1,
};
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
+ .early_demux_handler = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ net->ipv4.sysctl_udp_early_demux = 1;
+ net->ipv4.sysctl_tcp_early_demux = 1;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 51b27ae09fbd..0937b34c27ca 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -872,7 +872,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->pkt_type != PACKET_HOST)
state = NUD_STALE;
neigh_update(n, sha, state,
- override ? NEIGH_UPDATE_F_OVERRIDE : 0);
+ override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0);
neigh_release(n);
}
@@ -1033,7 +1033,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
}
return err;
@@ -1084,7 +1084,7 @@ static int arp_invalidate(struct net_device *dev, __be32 ip)
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebedd545e5e..df14815a3b8c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -571,7 +571,8 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
return ret;
}
-static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -582,7 +583,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
+ extack);
if (err < 0)
goto errout;
@@ -752,7 +754,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
struct in_device *in_dev;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -843,7 +846,8 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
return NULL;
}
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa;
@@ -1192,6 +1196,18 @@ out:
return done;
}
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+ int scope)
+{
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_scope != RT_SCOPE_LINK &&
+ ifa->ifa_scope <= scope)
+ return ifa->ifa_local;
+ } endfor_ifa(in_dev);
+
+ return 0;
+}
+
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
{
__be32 addr = 0;
@@ -1228,13 +1244,9 @@ no_in_dev:
if (master_idx &&
(dev = dev_get_by_index_rcu(net, master_idx)) &&
(in_dev = __in_dev_get_rcu(dev))) {
- for_primary_ifa(in_dev) {
- if (ifa->ifa_scope != RT_SCOPE_LINK &&
- ifa->ifa_scope <= scope) {
- addr = ifa->ifa_local;
- goto out_unlock;
- }
- } endfor_ifa(in_dev);
+ addr = in_dev_select_addr(in_dev, scope);
+ if (addr)
+ goto out_unlock;
}
/* Not loopback addresses on loopback should be preferred
@@ -1249,13 +1261,9 @@ no_in_dev:
if (!in_dev)
continue;
- for_primary_ifa(in_dev) {
- if (ifa->ifa_scope != RT_SCOPE_LINK &&
- ifa->ifa_scope <= scope) {
- addr = ifa->ifa_local;
- goto out_unlock;
- }
- } endfor_ifa(in_dev);
+ addr = in_dev_select_addr(in_dev, scope);
+ if (addr)
+ goto out_unlock;
}
out_unlock:
rcu_read_unlock();
@@ -1713,7 +1721,7 @@ static int inet_validate_link_af(const struct net_device *dev,
if (dev && !__in_dev_get_rtnl(dev))
return -EAFNOSUPPORT;
- err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
+ err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
if (err < 0)
return err;
@@ -1741,7 +1749,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (!in_dev)
return -EAFNOSUPPORT;
- if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
+ if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
BUG();
if (tb[IFLA_INET_CONF]) {
@@ -1798,6 +1806,9 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
goto nla_put_failure;
+ if (!devconf)
+ goto out;
+
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING,
IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
@@ -1819,6 +1830,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
goto nla_put_failure;
+out:
nlmsg_end(skb, nlh);
return 0;
@@ -1827,8 +1839,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
- struct ipv4_devconf *devconf)
+void inet_netconf_notify_devconf(struct net *net, int event, int type,
+ int ifindex, struct ipv4_devconf *devconf)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1838,7 +1850,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
goto errout;
err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
- RTM_NEWNETCONF, 0, type);
+ event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -1861,7 +1873,8 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
};
static int inet_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
@@ -1874,7 +1887,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_ipv4_policy);
+ devconf_ipv4_policy, extack);
if (err < 0)
goto errout;
@@ -2017,10 +2030,12 @@ static void inet_forward_change(struct net *net)
IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv4.devconf_dflt);
@@ -2033,7 +2048,8 @@ static void inet_forward_change(struct net *net)
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
}
@@ -2078,19 +2094,22 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_RP_FILTER,
ifindex, cnf);
}
if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
ifindex, cnf);
}
if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
ifindex, cnf);
}
}
@@ -2125,7 +2144,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
container_of(cnf, struct in_device, cnf);
if (*valp)
dev_disable_lro(idev->dev);
- inet_netconf_notify_devconf(net,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_FORWARDING,
idev->dev->ifindex,
cnf);
@@ -2133,7 +2152,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
rtnl_unlock();
rt_cache_flush(net);
} else
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv4.devconf_dflt);
}
@@ -2255,7 +2275,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
p->sysctl = t;
- inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+ ifindex, p);
return 0;
free:
@@ -2264,16 +2285,18 @@ out:
return -ENOBUFS;
}
-static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
+static void __devinet_sysctl_unregister(struct net *net,
+ struct ipv4_devconf *cnf, int ifindex)
{
struct devinet_sysctl_table *t = cnf->sysctl;
- if (!t)
- return;
+ if (t) {
+ cnf->sysctl = NULL;
+ unregister_net_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
- cnf->sysctl = NULL;
- unregister_net_sysctl_table(t->sysctl_header);
- kfree(t);
+ inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}
static int devinet_sysctl_register(struct in_device *idev)
@@ -2295,7 +2318,9 @@ static int devinet_sysctl_register(struct in_device *idev)
static void devinet_sysctl_unregister(struct in_device *idev)
{
- __devinet_sysctl_unregister(&idev->cnf);
+ struct net *net = dev_net(idev->dev);
+
+ __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
neigh_sysctl_unregister(idev->arp_parms);
}
@@ -2370,9 +2395,9 @@ static __net_init int devinet_init_net(struct net *net)
#ifdef CONFIG_SYSCTL
err_reg_ctl:
- __devinet_sysctl_unregister(dflt);
+ __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
err_reg_dflt:
- __devinet_sysctl_unregister(all);
+ __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
if (tbl != ctl_forward_entry)
kfree(tbl);
@@ -2394,8 +2419,10 @@ static __net_exit void devinet_exit_net(struct net *net)
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
- __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
- __devinet_sysctl_unregister(net->ipv4.devconf_all);
+ __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
+ NETCONFA_IFINDEX_DEFAULT);
+ __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
+ NETCONFA_IFINDEX_ALL);
kfree(tbl);
#endif
kfree(net->ipv4.devconf_dflt);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b1e24446e297..7e501adb5042 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -152,21 +152,28 @@ static void esp_output_restore_header(struct sk_buff *skb)
}
static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb,
+ struct xfrm_state *x,
struct ip_esp_hdr *esph,
struct esp_output_extra *extra)
{
- struct xfrm_state *x = skb_dst(skb)->xfrm;
-
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* encryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
+ __u32 seqhi;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (xo)
+ seqhi = xo->seq.hi;
+ else
+ seqhi = XFRM_SKB_CB(skb)->seq.output.hi;
+
extra->esphoff = (unsigned char *)esph -
skb_transport_header(skb);
esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
extra->seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ esph->seq_no = htonl(seqhi);
}
esph->spi = x->id.spi;
@@ -198,98 +205,56 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
- struct esp_output_extra *extra;
- int err = -ENOMEM;
- struct ip_esp_hdr *esph;
- struct crypto_aead *aead;
- struct aead_request *req;
- struct scatterlist *sg, *dsg;
- struct sk_buff *trailer;
- struct page *page;
- void *tmp;
- u8 *iv;
- u8 *tail;
- u8 *vaddr;
- int blksize;
- int clen;
- int alen;
- int plen;
- int ivlen;
- int tfclen;
- int nfrags;
- int assoclen;
- int extralen;
- int tailen;
- __be64 seqno;
- __u8 proto = *skb_mac_header(skb);
-
- /* skb is pure payload to encrypt */
-
- aead = x->data;
- alen = crypto_aead_authsize(aead);
- ivlen = crypto_aead_ivsize(aead);
-
- tfclen = 0;
- if (x->tfcpad) {
- struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
- u32 padto;
-
- padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
- if (skb->len < padto)
- tfclen = padto - skb->len;
+ int encap_type;
+ struct udphdr *uh;
+ __be32 *udpdata32;
+ __be16 sport, dport;
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct ip_esp_hdr *esph = esp->esph;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ encap_type = encap->encap_type;
+ spin_unlock_bh(&x->lock);
+
+ uh = (struct udphdr *)esph;
+ uh->source = sport;
+ uh->dest = dport;
+ uh->len = htons(skb->len + esp->tailen
+ - skb_transport_offset(skb));
+ uh->check = 0;
+
+ switch (encap_type) {
+ default:
+ case UDP_ENCAP_ESPINUDP:
+ esph = (struct ip_esp_hdr *)(uh + 1);
+ break;
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
+ udpdata32 = (__be32 *)(uh + 1);
+ udpdata32[0] = udpdata32[1] = 0;
+ esph = (struct ip_esp_hdr *)(udpdata32 + 2);
+ break;
}
- blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(skb->len + 2 + tfclen, blksize);
- plen = clen - skb->len - tfclen;
- tailen = tfclen + plen + alen;
- assoclen = sizeof(*esph);
- extralen = 0;
- if (x->props.flags & XFRM_STATE_ESN) {
- extralen += sizeof(*extra);
- assoclen += sizeof(__be32);
- }
+ *skb_mac_header(skb) = IPPROTO_UDP;
+ esp->esph = esph;
+}
- *skb_mac_header(skb) = IPPROTO_ESP;
- esph = ip_esp_hdr(skb);
+int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+{
+ u8 *tail;
+ u8 *vaddr;
+ int nfrags;
+ struct page *page;
+ struct sk_buff *trailer;
+ int tailen = esp->tailen;
/* this is non-NULL only with UDP Encapsulation */
- if (x->encap) {
- struct xfrm_encap_tmpl *encap = x->encap;
- struct udphdr *uh;
- __be32 *udpdata32;
- __be16 sport, dport;
- int encap_type;
-
- spin_lock_bh(&x->lock);
- sport = encap->encap_sport;
- dport = encap->encap_dport;
- encap_type = encap->encap_type;
- spin_unlock_bh(&x->lock);
-
- uh = (struct udphdr *)esph;
- uh->source = sport;
- uh->dest = dport;
- uh->len = htons(skb->len + tailen
- - skb_transport_offset(skb));
- uh->check = 0;
-
- switch (encap_type) {
- default:
- case UDP_ENCAP_ESPINUDP:
- esph = (struct ip_esp_hdr *)(uh + 1);
- break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- esph = (struct ip_esp_hdr *)(udpdata32 + 2);
- break;
- }
-
- *skb_mac_header(skb) = IPPROTO_UDP;
- }
+ if (x->encap)
+ esp_output_udp_encap(x, skb, esp);
if (!skb_cloned(skb)) {
if (tailen <= skb_availroom(skb)) {
@@ -304,6 +269,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct sock *sk = skb->sk;
struct page_frag *pfrag = &x->xfrag;
+ esp->inplace = false;
+
allocsize = ALIGN(tailen, L1_CACHE_BYTES);
spin_lock_bh(&x->lock);
@@ -320,10 +287,12 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
tail = vaddr + pfrag->offset;
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
kunmap_atomic(vaddr);
+ spin_unlock_bh(&x->lock);
+
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -339,76 +308,56 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
if (sk)
atomic_add(tailen, &sk->sk_wmem_alloc);
- skb_push(skb, -skb_network_offset(skb));
-
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
-
- tmp = esp_alloc_tmp(aead, nfrags + 2, extralen);
- if (!tmp) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
-
- extra = esp_tmp_extra(tmp);
- iv = esp_tmp_iv(aead, tmp, extralen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
- dsg = &sg[nfrags];
-
- esph = esp_output_set_extra(skb, esph, extra);
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
-
- if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
-
- skb_shinfo(skb)->nr_frags = 1;
-
- page = pfrag->page;
- get_page(page);
- /* replace page frags in skb with new page */
- __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
- pfrag->offset = pfrag->offset + allocsize;
-
- sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
- skb_to_sgvec(skb, dsg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- spin_unlock_bh(&x->lock);
-
- goto skip_cow2;
+ goto out;
}
}
cow:
- err = skb_cow_data(skb, tailen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
+ nfrags = skb_cow_data(skb, tailen, &trailer);
+ if (nfrags < 0)
+ goto out;
tail = skb_tail_pointer(trailer);
- esph = ip_esp_hdr(skb);
skip_cow:
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
+ pskb_put(skb, trailer, tailen);
- pskb_put(skb, trailer, clen - skb->len + alen);
- skb_push(skb, -skb_network_offset(skb));
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
+out:
+ return nfrags;
+}
+EXPORT_SYMBOL_GPL(esp_output_head);
- tmp = esp_alloc_tmp(aead, nfrags, extralen);
+int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+{
+ u8 *iv;
+ int alen;
+ void *tmp;
+ int ivlen;
+ int assoclen;
+ int extralen;
+ struct page *page;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct aead_request *req;
+ struct scatterlist *sg, *dsg;
+ struct esp_output_extra *extra;
+ int err = -ENOMEM;
+
+ assoclen = sizeof(struct ip_esp_hdr);
+ extralen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ extralen += sizeof(*extra);
+ assoclen += sizeof(__be32);
+ }
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ ivlen = crypto_aead_ivsize(aead);
+
+ tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen);
if (!tmp) {
+ spin_unlock_bh(&x->lock);
err = -ENOMEM;
goto error;
}
@@ -417,29 +366,58 @@ skip_cow:
iv = esp_tmp_iv(aead, tmp, extralen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- dsg = sg;
- esph = esp_output_set_extra(skb, esph, extra);
+ if (esp->inplace)
+ dsg = sg;
+ else
+ dsg = &sg[esp->nfrags];
- sg_init_table(sg, nfrags);
+ esph = esp_output_set_extra(skb, x, esp->esph, extra);
+ esp->esph = esph;
+
+ sg_init_table(sg, esp->nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
+ assoclen + ivlen + esp->clen + alen);
+
+ if (!esp->inplace) {
+ int allocsize;
+ struct page_frag *pfrag = &x->xfrag;
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+ spin_unlock_bh(&x->lock);
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ }
-skip_cow2:
if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_output_done_esn, skb);
else
aead_request_set_callback(req, 0, esp_output_done, skb);
- aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
+ aead_request_set_crypt(req, sg, dsg, ivlen + esp->clen, iv);
aead_request_set_ad(req, assoclen);
- seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
- ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
-
memset(iv, 0, ivlen);
- memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
+ memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&esp->seqno + 8 - min(ivlen, 8),
min(ivlen, 8));
ESP_SKB_CB(skb)->tmp = tmp;
@@ -465,11 +443,63 @@ skip_cow2:
error:
return err;
}
+EXPORT_SYMBOL_GPL(esp_output_tail);
-static int esp_input_done2(struct sk_buff *skb, int err)
+static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int alen;
+ int blksize;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+
+ esp.inplace = true;
+
+ esp.proto = *skb_mac_header(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ if (x->tfcpad) {
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+ padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ esp.tfclen = padto - skb->len;
+ }
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ esp.esph = ip_esp_hdr(skb);
+
+ esp.nfrags = esp_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+
+ esph = esp.esph;
+ esph->spi = x->id.spi;
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esp.seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
+ ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ return esp_output_tail(x, skb, &esp);
+}
+
+int esp_input_done2(struct sk_buff *skb, int err)
{
const struct iphdr *iph;
struct xfrm_state *x = xfrm_input_state(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
@@ -478,7 +508,8 @@ static int esp_input_done2(struct sk_buff *skb, int err)
u8 nexthdr[2];
int padlen;
- kfree(ESP_SKB_CB(skb)->tmp);
+ if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
goto out;
@@ -549,6 +580,7 @@ static int esp_input_done2(struct sk_buff *skb, int err)
out:
return err;
}
+EXPORT_SYMBOL_GPL(esp_input_done2);
static void esp_input_done(struct crypto_async_request *base, int err)
{
@@ -751,13 +783,17 @@ static int esp_init_aead(struct xfrm_state *x)
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
+ u32 mask = 0;
err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
- aead = crypto_alloc_aead(aead_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(aead_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -787,6 +823,7 @@ static int esp_init_authenc(struct xfrm_state *x)
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
+ u32 mask = 0;
err = -EINVAL;
if (!x->ealg)
@@ -812,7 +849,10 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
}
- aead = crypto_alloc_aead(authenc_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(authenc_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -931,7 +971,7 @@ static const struct xfrm_type esp_type =
.destructor = esp_destroy,
.get_mtu = esp4_get_mtu,
.input = esp_input,
- .output = esp_output
+ .output = esp_output,
};
static struct xfrm4_protocol esp4_protocol = {
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 1de442632406..e0666016a764 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -43,27 +43,31 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
- err = secpath_set(skb);
- if (err)
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo || !(xo->flags & CRYPTO_DONE)) {
+ err = secpath_set(skb);
+ if (err)
+ goto out;
- if (skb->sp->len == XFRM_MAX_DEPTH)
- goto out;
+ if (skb->sp->len == XFRM_MAX_DEPTH)
+ goto out;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ip_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET);
- if (!x)
- goto out;
+ x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ip_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET);
+ if (!x)
+ goto out;
- skb->sp->xvec[skb->sp->len++] = x;
- skb->sp->olen++;
+ skb->sp->xvec[skb->sp->len++] = x;
+ skb->sp->olen++;
- xo = xfrm_offload(skb);
- if (!xo) {
- xfrm_state_put(x);
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo) {
+ xfrm_state_put(x);
+ goto out;
+ }
}
+
xo->flags |= XFRM_GRO;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
@@ -84,19 +88,214 @@ out:
return NULL;
}
+static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_esp_hdr *esph;
+ struct iphdr *iph = ip_hdr(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ int proto = iph->protocol;
+
+ skb_push(skb, -skb_network_offset(skb));
+ esph = ip_esp_hdr(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ esph->spi = x->id.spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+
+ xo->proto = proto;
+}
+
+static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __u32 seq;
+ int err = 0;
+ struct sk_buff *skb2;
+ struct xfrm_state *x;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ netdev_features_t esp_features = features;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (!xo)
+ goto out;
+
+ seq = xo->seq.low;
+
+ x = skb->sp->xvec[skb->sp->len - 1];
+ aead = x->data;
+ esph = ip_esp_hdr(skb);
+
+ if (esph->spi != x->id.spi)
+ goto out;
+
+ if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
+ goto out;
+
+ __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
+
+ skb->encap_hdr_csum = 1;
+
+ if (!(features & NETIF_F_HW_ESP))
+ esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+
+ segs = x->outer_mode->gso_segment(x, skb, esp_features);
+ if (IS_ERR_OR_NULL(segs))
+ goto out;
+
+ __skb_pull(skb, skb->data - skb_mac_header(skb));
+
+ skb2 = segs;
+ do {
+ struct sk_buff *nskb = skb2->next;
+
+ xo = xfrm_offload(skb2);
+ xo->flags |= XFRM_GSO_SEGMENT;
+ xo->seq.low = seq;
+ xo->seq.hi = xfrm_replay_seqhi(x, seq);
+
+ if(!(features & NETIF_F_HW_ESP))
+ xo->flags |= CRYPTO_FALLBACK;
+
+ x->outer_mode->xmit(x, skb2);
+
+ err = x->type_offload->xmit(x, skb2, esp_features);
+ if (err) {
+ kfree_skb_list(segs);
+ return ERR_PTR(err);
+ }
+
+ if (!skb_is_gso(skb2))
+ seq++;
+ else
+ seq += skb_shinfo(skb2)->gso_segs;
+
+ skb_push(skb2, skb2->mac_len);
+ skb2 = nskb;
+ } while (skb2);
+
+out:
+ return segs;
+}
+
+static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct crypto_aead *aead = x->data;
+
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
+ return -EINVAL;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return esp_input_done2(skb, 0);
+}
+
+static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features)
+{
+ int err;
+ int alen;
+ int blksize;
+ struct xfrm_offload *xo;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+ bool hw_offload = true;
+
+ esp.inplace = true;
+
+ xo = xfrm_offload(skb);
+
+ if (!xo)
+ return -EINVAL;
+
+ if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+ (x->xso.dev != skb->dev)) {
+ xo->flags |= CRYPTO_FALLBACK;
+ hw_offload = false;
+ }
+
+ esp.proto = xo->proto;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ /* XXX: Add support for tfc padding here. */
+
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ esp.esph = ip_esp_hdr(skb);
+
+
+ if (!hw_offload || (hw_offload && !skb_is_gso(skb))) {
+ esp.nfrags = esp_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+ }
+
+ esph = esp.esph;
+ esph->spi = x->id.spi;
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ esph->seq_no = htonl(xo->seq.low);
+ } else {
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
+ }
+
+ if (hw_offload)
+ return 0;
+
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+ err = esp_output_tail(x, skb, &esp);
+ if (err < 0)
+ return err;
+
+ secpath_reset(skb);
+
+ return 0;
+}
+
static const struct net_offload esp4_offload = {
.callbacks = {
.gro_receive = esp4_gro_receive,
+ .gso_segment = esp4_gso_segment,
},
};
+static const struct xfrm_type_offload esp_type_offload = {
+ .description = "ESP4 OFFLOAD",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ESP,
+ .input_tail = esp_input_tail,
+ .xmit = esp_xmit,
+ .encap = esp4_gso_encap,
+};
+
static int __init esp4_offload_init(void)
{
+ if (xfrm_register_type_offload(&esp_type_offload, AF_INET) < 0) {
+ pr_info("%s: can't add xfrm type offload\n", __func__);
+ return -EAGAIN;
+ }
+
return inet_add_offload(&esp4_offload, IPPROTO_ESP);
}
static void __exit esp4_offload_exit(void)
{
+ if (xfrm_unregister_type_offload(&esp_type_offload, AF_INET) < 0)
+ pr_info("%s: can't remove xfrm type offload\n", __func__);
+
inet_del_offload(&esp4_offload, IPPROTO_ESP);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8f2133ffc2ff..5a0e456b5d58 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -632,7 +632,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
int err, remaining;
struct rtmsg *rtm;
- err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
+ err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -709,7 +710,8 @@ errout:
return err;
}
-static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_config cfg;
@@ -731,7 +733,8 @@ errout:
return err;
}
-static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct fib_config cfg;
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
new file mode 100644
index 000000000000..e0714d975947
--- /dev/null
+++ b/net/ipv4/fib_notifier.c
@@ -0,0 +1,86 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/netns/ipv4.h>
+#include <net/ip_fib.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return nb->notifier_call(nb, event_type, info);
+}
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ net->ipv4.fib_seq++;
+ info->net = net;
+ return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+
+static unsigned int fib_seq_sum(void)
+{
+ unsigned int fib_seq = 0;
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net)
+ fib_seq += net->ipv4.fib_seq;
+ rtnl_unlock();
+
+ return fib_seq;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ unsigned int fib_seq)
+{
+ atomic_notifier_chain_register(&fib_chain, nb);
+ if (fib_seq == fib_seq_sum())
+ return true;
+ atomic_notifier_chain_unregister(&fib_chain, nb);
+ if (cb)
+ cb(nb);
+ return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb))
+{
+ int retries = 0;
+
+ do {
+ unsigned int fib_seq = fib_seq_sum();
+ struct net *net;
+
+ /* Mutex semantics guarantee that every change done to
+ * FIB tries before we read the change sequence counter
+ * is now visible to us.
+ */
+ rcu_read_lock();
+ for_each_net_rcu(net) {
+ fib_rules_notify(net, nb);
+ fib_notify(net, nb);
+ }
+ rcu_read_unlock();
+
+ if (fib_dump_is_consistent(nb, cb, fib_seq))
+ return 0;
+ } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+ return -EBUSY;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 2e50062f642d..778ecf977eb2 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -47,6 +47,27 @@ struct fib4_rule {
#endif
};
+static bool fib4_rule_matchall(const struct fib_rule *rule)
+{
+ struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
+
+ if (r->dst_len || r->src_len || r->tos)
+ return false;
+ return fib_rule_matchall(rule);
+}
+
+bool fib4_rule_default(const struct fib_rule *rule)
+{
+ if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+ rule->l3mdev)
+ return false;
+ if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN &&
+ rule->table != RT_TABLE_DEFAULT)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib4_rule_default);
+
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
@@ -164,12 +185,36 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule)
+{
+ struct fib_rule_notifier_info info = {
+ .rule = rule,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
static int call_fib_rule_notifiers(struct net *net,
- enum fib_event_type event_type)
+ enum fib_event_type event_type,
+ struct fib_rule *rule)
+{
+ struct fib_rule_notifier_info info = {
+ .rule = rule,
+ };
+
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+void fib_rules_notify(struct net *net, struct notifier_block *nb)
{
- struct fib_notifier_info info;
+ struct fib_rules_ops *ops = net->ipv4.rules_ops;
+ struct fib_rule *rule;
- return call_fib_notifiers(net, event_type, &info);
+ list_for_each_entry_rcu(rule, &ops->rules_list, list)
+ call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
}
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
@@ -228,7 +273,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
@@ -250,7 +295,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 317026a39cfa..da449ddb8cc1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,7 +57,6 @@ static unsigned int fib_info_cnt;
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-u32 fib_multipath_secret __read_mostly;
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
@@ -576,9 +575,6 @@ static void fib_rebalance(struct fib_info *fi)
atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
} endfor_nexthops(fi);
-
- net_get_random_once(&fib_multipath_secret,
- sizeof(fib_multipath_secret));
}
static inline void fib_add_weight(struct fib_info *fi,
@@ -1641,7 +1637,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
#endif
void fib_select_path(struct net *net, struct fib_result *res,
- struct flowi4 *fl4, int mp_hash)
+ struct flowi4 *fl4, const struct sk_buff *skb)
{
bool oif_check;
@@ -1650,10 +1646,9 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1 && oif_check) {
- if (mp_hash < 0)
- mp_hash = get_hash_from_flowi4(fl4) >> 1;
+ int h = fib_multipath_hash(res->fi, fl4, skb);
- fib_select_multipath(res, mp_hash);
+ fib_select_multipath(res, h);
}
else
#endif
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2f0d8233950f..1201409ba1dc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -84,43 +84,6 @@
#include <trace/events/fib.h>
#include "fib_lookup.h"
-static unsigned int fib_seq_sum(void)
-{
- unsigned int fib_seq = 0;
- struct net *net;
-
- rtnl_lock();
- for_each_net(net)
- fib_seq += net->ipv4.fib_seq;
- rtnl_unlock();
-
- return fib_seq;
-}
-
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-static int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info)
-{
- info->net = net;
- return nb->notifier_call(nb, event_type, info);
-}
-
-static void fib_rules_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type)
-{
-#ifdef CONFIG_IP_MULTIPLE_TABLES
- struct fib_notifier_info info;
-
- if (net->ipv4.fib_has_custom_rules)
- call_fib_notifier(nb, net, event_type, &info);
-#endif
-}
-
-static void fib_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type);
-
static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type, u32 dst,
int dst_len, struct fib_info *fi,
@@ -137,62 +100,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
return call_fib_notifier(nb, net, event_type, &info.info);
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb),
- unsigned int fib_seq)
-{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
- return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
- if (cb)
- cb(nb);
- return false;
-}
-
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
-{
- int retries = 0;
-
- do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
-
- /* Mutex semantics guarantee that every change done to
- * FIB tries before we read the change sequence counter
- * is now visible to us.
- */
- rcu_read_lock();
- for_each_net_rcu(net) {
- fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
- fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
- }
- rcu_read_unlock();
-
- if (fib_dump_is_consistent(nb, cb, fib_seq))
- return 0;
- } while (++retries < FIB_DUMP_MAX_RETRIES);
-
- return -EBUSY;
-}
-EXPORT_SYMBOL(register_fib_notifier);
-
-int unregister_fib_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
-}
-EXPORT_SYMBOL(unregister_fib_notifier);
-
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
- struct fib_notifier_info *info)
-{
- net->ipv4.fib_seq++;
- info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
-}
-
static int call_fib_entry_notifiers(struct net *net,
enum fib_event_type event_type, u32 dst,
int dst_len, struct fib_info *fi,
@@ -1995,8 +1902,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
}
static void fib_leaf_notify(struct net *net, struct key_vector *l,
- struct fib_table *tb, struct notifier_block *nb,
- enum fib_event_type event_type)
+ struct fib_table *tb, struct notifier_block *nb)
{
struct fib_alias *fa;
@@ -2012,22 +1918,21 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
if (tb->tb_id != fa->tb_id)
continue;
- call_fib_entry_notifier(nb, net, event_type, l->key,
+ call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
fa->fa_type, fa->tb_id);
}
}
static void fib_table_notify(struct net *net, struct fib_table *tb,
- struct notifier_block *nb,
- enum fib_event_type event_type)
+ struct notifier_block *nb)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *l, *tp = t->kv;
t_key key = 0;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- fib_leaf_notify(net, l, tb, nb, event_type);
+ fib_leaf_notify(net, l, tb, nb);
key = l->key + 1;
/* stop in case of wrap around */
@@ -2036,8 +1941,7 @@ static void fib_table_notify(struct net *net, struct fib_table *tb,
}
}
-static void fib_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type)
+void fib_notify(struct net *net, struct notifier_block *nb)
{
unsigned int h;
@@ -2046,7 +1950,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb,
struct fib_table *tb;
hlist_for_each_entry_rcu(tb, head, tb_hlist)
- fib_table_notify(net, tb, nb, event_type);
+ fib_table_notify(net, tb, nb);
}
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc310db2708b..43318b5f5647 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -464,22 +464,6 @@ out_bh_enable:
local_bh_enable();
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-/* Source and destination is swapped. See ip_multipath_icmp_hash */
-static int icmp_multipath_hash_skb(const struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- return fib_multipath_hash(iph->daddr, iph->saddr);
-}
-
-#else
-
-#define icmp_multipath_hash_skb(skb) (-1)
-
-#endif
-
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
- rt = __ip_route_output_key_hash(net, fl4,
- icmp_multipath_hash_skb(skb_in));
+ rt = __ip_route_output_key_hash(net, fl4, skb_in);
if (IS_ERR(rt))
return rt;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c9c1cb635d9a..e90c80a548ad 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -829,7 +829,8 @@ out:
static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -886,6 +887,9 @@ static int ipgre_netlink_parms(struct net_device *dev,
t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
}
+ if (data[IFLA_GRE_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+
return 0;
}
@@ -957,6 +961,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ __u32 fwmark = 0;
int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -967,31 +972,32 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- err = ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
- return ip_tunnel_newlink(dev, tb, &p);
+ return ip_tunnel_newlink(dev, tb, &p, fwmark);
}
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
+ struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ __u32 fwmark = t->fwmark;
int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
- struct ip_tunnel *t = netdev_priv(dev);
err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- err = ipgre_netlink_parms(dev, data, tb, &p);
+ err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
- return ip_tunnel_changelink(dev, tb, &p);
+ return ip_tunnel_changelink(dev, tb, &p, fwmark);
}
static size_t ipgre_get_size(const struct net_device *dev)
@@ -1029,6 +1035,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(0) +
/* IFLA_GRE_IGNORE_DF */
nla_total_size(1) +
+ /* IFLA_GRE_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1049,7 +1057,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
nla_put_u8(skb, IFLA_GRE_PMTUDISC,
- !!(p->iph.frag_off & htons(IP_DF))))
+ !!(p->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1093,6 +1102,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
+ [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6feabb03516..fa2dc8f692c6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct net_device *dev = skb->dev;
+ void (*edemux)(struct sk_buff *skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->early_demux) {
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+ edemux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 823abaef006b..b878ecbc0608 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -293,7 +293,8 @@ failed:
static inline void init_tunnel_flow(struct flowi4 *fl4,
int proto,
__be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif)
+ __be32 key, __u8 tos, int oif,
+ __u32 mark)
{
memset(fl4, 0, sizeof(*fl4));
fl4->flowi4_oif = oif;
@@ -302,6 +303,7 @@ static inline void init_tunnel_flow(struct flowi4 *fl4,
fl4->flowi4_tos = tos;
fl4->flowi4_proto = proto;
fl4->fl4_gre_key = key;
+ fl4->flowi4_mark = mark;
}
static int ip_tunnel_bind_dev(struct net_device *dev)
@@ -322,7 +324,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link);
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -578,7 +581,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link);
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -707,7 +710,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
@@ -795,7 +799,8 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel *t,
struct net_device *dev,
struct ip_tunnel_parm *p,
- bool set_mtu)
+ bool set_mtu,
+ __u32 fwmark)
{
ip_tunnel_del(itn, t);
t->parms.iph.saddr = p->iph.saddr;
@@ -812,10 +817,11 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
t->parms.iph.tos = p->iph.tos;
t->parms.iph.frag_off = p->iph.frag_off;
- if (t->parms.link != p->link) {
+ if (t->parms.link != p->link || t->fwmark != fwmark) {
int mtu;
t->parms.link = p->link;
+ t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
dev->mtu = mtu;
@@ -893,7 +899,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
if (t) {
err = 0;
- ip_tunnel_update(itn, t, dev, p, true);
+ ip_tunnel_update(itn, t, dev, p, true, 0);
} else {
err = -ENOENT;
}
@@ -1066,7 +1072,7 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p)
+ struct ip_tunnel_parm *p, __u32 fwmark)
{
struct ip_tunnel *nt;
struct net *net = dev_net(dev);
@@ -1087,6 +1093,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
nt->net = net;
nt->parms = *p;
+ nt->fwmark = fwmark;
err = register_netdevice(dev);
if (err)
goto out;
@@ -1105,7 +1112,7 @@ out:
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p)
+ struct ip_tunnel_parm *p, __u32 fwmark)
{
struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1137,7 +1144,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
}
}
- ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
+ ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index a31f47ccaad9..baf196eaf1d8 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -235,7 +235,7 @@ static int ip_tun_build_state(struct nlattr *attr,
struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
int err;
- err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy);
+ err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL);
if (err < 0)
return err;
@@ -332,7 +332,8 @@ static int ip6_tun_build_state(struct nlattr *attr,
struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
int err;
- err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy);
+ err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy,
+ NULL);
if (err < 0)
return err;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 8b14f1404c8f..40977413fd48 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -471,7 +471,8 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}
static void vti_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -497,24 +498,29 @@ static void vti_netlink_parms(struct nlattr *data[],
if (data[IFLA_VTI_REMOTE])
parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]);
+ if (data[IFLA_VTI_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]);
}
static int vti_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm parms;
+ __u32 fwmark = 0;
- vti_netlink_parms(data, &parms);
- return ip_tunnel_newlink(dev, tb, &parms);
+ vti_netlink_parms(data, &parms, &fwmark);
+ return ip_tunnel_newlink(dev, tb, &parms, fwmark);
}
static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
+ struct ip_tunnel *t = netdev_priv(dev);
+ __u32 fwmark = t->fwmark;
struct ip_tunnel_parm p;
- vti_netlink_parms(data, &p);
- return ip_tunnel_changelink(dev, tb, &p);
+ vti_netlink_parms(data, &p, &fwmark);
+ return ip_tunnel_changelink(dev, tb, &p, fwmark);
}
static size_t vti_get_size(const struct net_device *dev)
@@ -530,6 +536,8 @@ static size_t vti_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_VTI_REMOTE */
nla_total_size(4) +
+ /* IFLA_VTI_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -543,6 +551,7 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr);
nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr);
+ nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark);
return 0;
}
@@ -553,6 +562,7 @@ static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
[IFLA_VTI_OKEY] = { .type = NLA_U32 },
[IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
[IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VTI_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops vti_link_ops __read_mostly = {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index dfb2ab2dd3c8..c3b12b1c7162 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -57,6 +57,7 @@
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/arp.h>
+#include <net/dsa.h>
#include <net/ip.h>
#include <net/ipconfig.h>
#include <net/route.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 00d4229b6954..1e441c6f2160 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -390,7 +390,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}
static void ipip_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms, bool *collect_md)
+ struct ip_tunnel_parm *parms, bool *collect_md,
+ __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -428,6 +429,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_COLLECT_METADATA])
*collect_md = true;
+
+ if (data[IFLA_IPTUN_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -470,6 +474,7 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ __u32 fwmark = 0;
if (ipip_netlink_encap_parms(data, &ipencap)) {
int err = ip_tunnel_encap_setup(t, &ipencap);
@@ -478,26 +483,27 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- ipip_netlink_parms(data, &p, &t->collect_md);
- return ip_tunnel_newlink(dev, tb, &p);
+ ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
+ return ip_tunnel_newlink(dev, tb, &p, fwmark);
}
static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
+ struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
bool collect_md;
+ __u32 fwmark = t->fwmark;
if (ipip_netlink_encap_parms(data, &ipencap)) {
- struct ip_tunnel *t = netdev_priv(dev);
int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipip_netlink_parms(data, &p, &collect_md);
+ ipip_netlink_parms(data, &p, &collect_md, &fwmark);
if (collect_md)
return -EINVAL;
@@ -505,7 +511,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
return -EINVAL;
- return ip_tunnel_changelink(dev, tb, &p);
+ return ip_tunnel_changelink(dev, tb, &p, fwmark);
}
static size_t ipip_get_size(const struct net_device *dev)
@@ -535,6 +541,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_IPTUN_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -550,7 +558,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
- !!(parm->iph.frag_off & htons(IP_DF))))
+ !!(parm->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
@@ -585,6 +594,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b036e85e093b..3a02d52ed50e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -631,7 +631,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
- inet_netconf_notify_devconf(dev_net(dev),
+ inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in_dev->cnf);
ip_rt_multicast_event(in_dev);
@@ -820,8 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return -EADDRNOTAVAIL;
}
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
- &in_dev->cnf);
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in_dev->cnf);
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
@@ -1282,7 +1282,8 @@ static void mrtsock_destruct(struct sock *sk)
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
@@ -1343,7 +1344,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (ret == 0) {
rcu_assign_pointer(mrt->mroute_sk, sk);
IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
}
@@ -2421,7 +2423,8 @@ static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
struct mfcctl *mfcc, int *mrtsock,
- struct mr_table **mrtret)
+ struct mr_table **mrtret,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = NULL;
u32 tblid = RT_TABLE_DEFAULT;
@@ -2430,7 +2433,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
struct rtmsg *rtm;
int ret, rem;
- ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy);
+ ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy,
+ extack);
if (ret < 0)
goto out;
rtm = nlmsg_data(nlh);
@@ -2489,7 +2493,8 @@ out:
}
/* takes care of both newroute and delroute */
-static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
int ret, mrtsock, parent;
@@ -2498,7 +2503,7 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh)
mrtsock = 0;
tbl = NULL;
- ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl);
+ ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
if (ret < 0)
return ret;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 6241a81fd7f5..f17dab1dee6e 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -562,8 +562,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
- if (ret != 0)
- goto out_free;
ret = -EINVAL;
if (i != repl->num_entries)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 9b8841316e7b..038f293c2376 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -22,6 +22,7 @@
#include <linux/icmp.h>
#include <linux/if_arp.h>
#include <linux/seq_file.h>
+#include <linux/refcount.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,8 +41,8 @@ MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
struct clusterip_config {
struct list_head list; /* list of all configs */
- atomic_t refcount; /* reference count */
- atomic_t entries; /* number of entries/rules
+ refcount_t refcount; /* reference count */
+ refcount_t entries; /* number of entries/rules
* referencing us */
__be32 clusterip; /* the IP address */
@@ -77,7 +78,7 @@ struct clusterip_net {
static inline void
clusterip_config_get(struct clusterip_config *c)
{
- atomic_inc(&c->refcount);
+ refcount_inc(&c->refcount);
}
@@ -89,7 +90,7 @@ static void clusterip_config_rcu_free(struct rcu_head *head)
static inline void
clusterip_config_put(struct clusterip_config *c)
{
- if (atomic_dec_and_test(&c->refcount))
+ if (refcount_dec_and_test(&c->refcount))
call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
}
@@ -103,7 +104,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
local_bh_disable();
- if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+ if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
list_del_rcu(&c->list);
spin_unlock(&cn->lock);
local_bh_enable();
@@ -149,10 +150,10 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
c = NULL;
else
#endif
- if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+ if (unlikely(!refcount_inc_not_zero(&c->refcount)))
c = NULL;
else if (entry)
- atomic_inc(&c->entries);
+ refcount_inc(&c->entries);
}
rcu_read_unlock_bh();
@@ -188,8 +189,8 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
- atomic_set(&c->refcount, 1);
- atomic_set(&c->entries, 1);
+ refcount_set(&c->refcount, 1);
+ refcount_set(&c->entries, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 53e49f5011d3..da04b9c33ef3 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -998,18 +998,6 @@ err_id_free:
*
*****************************************************************************/
-static void hex_dump(const unsigned char *buf, size_t len)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (i && !(i % 16))
- printk("\n");
- printk("%02x ", *(buf + i));
- }
- printk("\n");
-}
-
/*
* Parse and mangle SNMP message according to mapping.
* (And this is the fucking 'basic' method).
@@ -1026,7 +1014,8 @@ static int snmp_parse_mangle(unsigned char *msg,
struct snmp_object *obj;
if (debug > 1)
- hex_dump(msg, len);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
+ msg, len, 0);
asn1_open(&ctx, msg, len);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 146d86105183..7cd8d0d918f8 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -104,7 +104,6 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- const struct iphdr *oiph;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _oth;
@@ -116,8 +115,6 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
- oiph = ip_hdr(oldskb);
-
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 2981291910dd..f4e4462cb5bb 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
@@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (ipv4_is_zeronet(iph->saddr)) {
if (ipv4_is_lbcast(iph->daddr) ||
ipv4_is_local_multicast(iph->daddr)) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
get_ifindex(pkt->skb->dev));
return;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 69cf49e8356d..fa44e752a9a3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED),
SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED),
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
- SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED),
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
@@ -282,6 +281,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
+ SNMP_MIB_ITEM("TCPFastOpenBlackhole", LINUX_MIB_TCPFASTOPENBLACKHOLE),
SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING),
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 4b7c0ec65251..32a691b7ce2c 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_offloads);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d9724889ff09..655d9eebe43e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1250,15 +1250,11 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
- unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
+ unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
+ unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+ ip_rt_min_advmss);
- if (advmss == 0) {
- advmss = max_t(unsigned int, dst->dev->mtu - 40,
- ip_rt_min_advmss);
- if (advmss > 65535 - 40)
- advmss = 65535 - 40;
- }
- return advmss;
+ return min(advmss, IPV4_MAX_PMTU - header_size);
}
static unsigned int ipv4_mtu(const struct dst_entry *dst)
@@ -1734,45 +1730,97 @@ out:
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
/* To make ICMP packets follow the right flow, the multipath hash is
- * calculated from the inner IP addresses in reverse order.
+ * calculated from the inner IP addresses.
*/
-static int ip_multipath_icmp_hash(struct sk_buff *skb)
+static void ip_multipath_l3_keys(const struct sk_buff *skb,
+ struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
- struct icmphdr _icmph;
+ const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
- const struct iphdr *inner_iph;
+ struct icmphdr _icmph;
+
+ hash_keys->addrs.v4addrs.src = outer_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
+ if (likely(outer_iph->protocol != IPPROTO_ICMP))
+ return;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- goto standard_hash;
+ return;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- goto standard_hash;
+ return;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB) {
- goto standard_hash;
- }
+ icmph->type != ICMP_PARAMETERPROB)
+ return;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- goto standard_hash;
+ return;
+ hash_keys->addrs.v4addrs.src = inner_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+}
- return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
+/* if skb is set it will be used and fl4 can be NULL */
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+ const struct sk_buff *skb)
+{
+ struct net *net = fi->fib_net;
+ struct flow_keys hash_keys;
+ u32 mhash;
-standard_hash:
- return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
-}
+ switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (skb) {
+ ip_multipath_l3_keys(skb, &hash_keys);
+ } else {
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ }
+ break;
+ case 1:
+ /* skb is currently provided only when forwarding */
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ hash_keys.ports.src = keys.ports.src;
+ hash_keys.ports.dst = keys.ports.dst;
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ hash_keys.ports.src = fl4->fl4_sport;
+ hash_keys.ports.dst = fl4->fl4_dport;
+ hash_keys.basic.ip_proto = fl4->flowi4_proto;
+ }
+ break;
+ }
+ mhash = flow_hash_from_keys(&hash_keys);
+ return mhash >> 1;
+}
+EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
@@ -1782,12 +1830,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h;
+ int h = fib_multipath_hash(res->fi, NULL, skb);
- if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
- h = ip_multipath_icmp_hash(skb);
- else
- h = fib_multipath_hash(saddr, daddr);
fib_select_multipath(res, h);
}
#endif
@@ -2203,7 +2247,7 @@ add:
*/
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- int mp_hash)
+ const struct sk_buff *skb)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
@@ -2366,7 +2410,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
goto make_route;
}
- fib_select_path(net, &res, fl4, mp_hash);
+ fib_select_path(net, &res, fl4, skb);
dev_out = FIB_RES_DEV(res);
fl4->flowi4_oif = dev_out->ifindex;
@@ -2586,7 +2630,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
@@ -2602,7 +2647,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
u32 table_id = RT_TABLE_MAIN;
kuid_t uid;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
+ extack);
if (err < 0)
goto errout;
@@ -2620,10 +2666,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
- /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- ip_hdr(skb)->protocol = IPPROTO_UDP;
- skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
-
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
@@ -2633,6 +2675,15 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
else
uid = (iif ? INVALID_UID : current_uid());
+ /* Bugfix: need to give ip_route_input enough of an IP header to
+ * not gag.
+ */
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
+ ip_hdr(skb)->saddr = src;
+ ip_hdr(skb)->daddr = dst;
+
+ skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.saddr = src;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d6880a6149ee..86957e9cd6c6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -24,6 +24,7 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
+#include <net/protocol.h>
static int zero;
static int one = 1;
@@ -294,6 +295,74 @@ bad_key:
return ret;
}
+static void proc_configure_early_demux(int enabled, int protocol)
+{
+ struct net_protocol *ipprot;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol *ip6prot;
+#endif
+
+ rcu_read_lock();
+
+ ipprot = rcu_dereference(inet_protos[protocol]);
+ if (ipprot)
+ ipprot->early_demux = enabled ? ipprot->early_demux_handler :
+ NULL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6prot = rcu_dereference(inet6_protos[protocol]);
+ if (ip6prot)
+ ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
+ NULL;
+#endif
+ rcu_read_unlock();
+}
+
+static int proc_tcp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_TCP);
+ }
+
+ return ret;
+}
+
+static int proc_udp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_udp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_UDP);
+ }
+
+ return ret;
+}
+
+static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
+ int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ tcp_fastopen_active_timeout_reset();
+ return ret;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -344,6 +413,14 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_fastopen_key,
},
{
+ .procname = "tcp_fastopen_blackhole_timeout_sec",
+ .data = &sysctl_tcp_fastopen_blackhole_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tfo_blackhole_detect_timeout,
+ .extra1 = &zero,
+ },
+ {
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -750,6 +827,20 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "udp_early_demux",
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_udp_early_demux
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tcp_early_demux
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
@@ -981,13 +1072,6 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_tw_recycle",
- .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_syn_backlog",
.data = &init_net.ipv4.sysctl_max_syn_backlog,
.maxlen = sizeof(int),
@@ -1004,6 +1088,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "fib_multipath_hash_policy",
+ .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#endif
{
.procname = "ip_unprivileged_port_start",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 40ba4249a586..059dad7deefe 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2296,6 +2296,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
tcp_write_queue_purge(sk);
+ tcp_fastopen_active_disable_ofo_check(sk);
skb_rbtree_purge(&tp->out_of_order_queue);
inet->inet_dport = 0;
@@ -2394,7 +2395,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
u16 snd_wscale = opt.opt_val & 0xFFFF;
u16 rcv_wscale = opt.opt_val >> 16;
- if (snd_wscale > 14 || rcv_wscale > 14)
+ if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE)
return -EFBIG;
tp->rx_opt.snd_wscale = snd_wscale;
@@ -2471,7 +2472,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
}
@@ -2852,7 +2853,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_advmss = tp->advmss;
- info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
+ info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3;
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index c99230efcd52..0683ba447d77 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -72,7 +72,7 @@ MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
module_param(hystart, int, 0644);
MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
module_param(hystart_detect, int, 0644);
-MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
+MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
" 1: packet-train 2: delay 3: both packet-train and delay");
module_param(hystart_low_window, int, 0644);
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8ea4e9787f82..4af82b914dd4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -341,6 +341,13 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
cookie->len = -1;
return false;
}
+
+ /* Firewall blackhole issue check */
+ if (tcp_fastopen_active_should_disable(sk)) {
+ cookie->len = -1;
+ return false;
+ }
+
if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
cookie->len = -1;
return true;
@@ -380,3 +387,98 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
return false;
}
EXPORT_SYMBOL(tcp_fastopen_defer_connect);
+
+/*
+ * The following code block is to deal with middle box issues with TFO:
+ * Middlebox firewall issues can potentially cause server's data being
+ * blackholed after a successful 3WHS using TFO.
+ * The proposed solution is to disable active TFO globally under the
+ * following circumstances:
+ * 1. client side TFO socket receives out of order FIN
+ * 2. client side TFO socket receives out of order RST
+ * We disable active side TFO globally for 1hr at first. Then if it
+ * happens again, we disable it for 2h, then 4h, 8h, ...
+ * And we reset the timeout back to 1hr when we see a successful active
+ * TFO connection with data exchanges.
+ */
+
+/* Default to 1hr */
+unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
+static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
+static unsigned long tfo_active_disable_stamp __read_mostly;
+
+/* Disable active TFO and record current jiffies and
+ * tfo_active_disable_times
+ */
+void tcp_fastopen_active_disable(struct sock *sk)
+{
+ atomic_inc(&tfo_active_disable_times);
+ tfo_active_disable_stamp = jiffies;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
+}
+
+/* Reset tfo_active_disable_times to 0 */
+void tcp_fastopen_active_timeout_reset(void)
+{
+ atomic_set(&tfo_active_disable_times, 0);
+}
+
+/* Calculate timeout for tfo active disable
+ * Return true if we are still in the active TFO disable period
+ * Return false if timeout already expired and we should use active TFO
+ */
+bool tcp_fastopen_active_should_disable(struct sock *sk)
+{
+ int tfo_da_times = atomic_read(&tfo_active_disable_times);
+ int multiplier;
+ unsigned long timeout;
+
+ if (!tfo_da_times)
+ return false;
+
+ /* Limit timout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(tfo_da_times - 1, 6);
+ timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
+ if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+ return true;
+
+ /* Mark check bit so we can check for successful active TFO
+ * condition and reset tfo_active_disable_times
+ */
+ tcp_sk(sk)->syn_fastopen_ch = 1;
+ return false;
+}
+
+/* Disable active TFO if FIN is the only packet in the ofo queue
+ * and no data is received.
+ * Also check if we can reset tfo_active_disable_times if data is
+ * received successfully on a marked active TFO sockets opened on
+ * a non-loopback interface
+ */
+void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct rb_node *p;
+ struct sk_buff *skb;
+ struct dst_entry *dst;
+
+ if (!tp->syn_fastopen)
+ return;
+
+ if (!tp->data_segs_in) {
+ p = rb_first(&tp->out_of_order_queue);
+ if (p && !rb_next(p)) {
+ skb = rb_entry(p, struct sk_buff, rbnode);
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ tcp_fastopen_active_disable(sk);
+ return;
+ }
+ }
+ } else if (tp->syn_fastopen_ch &&
+ atomic_read(&tfo_active_disable_times)) {
+ dst = sk_dst_get(sk);
+ if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
+ tcp_fastopen_active_timeout_reset();
+ dst_release(dst);
+ }
+}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 659d1baefb2b..9739962bfb3f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -442,7 +442,8 @@ void tcp_init_buffer_space(struct sock *sk)
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
- tp->rcvq_space.time = tcp_time_stamp;
+ skb_mstamp_get(&tp->tcp_mstamp);
+ tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
maxwin = tcp_full_space(sk);
@@ -518,7 +519,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*/
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
{
- u32 new_sample = tp->rcv_rtt_est.rtt;
+ u32 new_sample = tp->rcv_rtt_est.rtt_us;
long m = sample;
if (m == 0)
@@ -548,21 +549,23 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
new_sample = m << 3;
}
- if (tp->rcv_rtt_est.rtt != new_sample)
- tp->rcv_rtt_est.rtt = new_sample;
+ tp->rcv_rtt_est.rtt_us = new_sample;
}
static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
{
- if (tp->rcv_rtt_est.time == 0)
+ u32 delta_us;
+
+ if (tp->rcv_rtt_est.time.v64 == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
- tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
+ delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+ tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
- tp->rcv_rtt_est.time = tcp_time_stamp;
+ tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
@@ -572,7 +575,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (tp->rx_opt.rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
- tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
+ tcp_rcv_rtt_update(tp,
+ jiffies_to_usecs(tcp_time_stamp -
+ tp->rx_opt.rcv_tsecr),
+ 0);
}
/*
@@ -585,8 +591,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
int time;
int copied;
- time = tcp_time_stamp - tp->rcvq_space.time;
- if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
+ time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+ if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
/* Number of bytes copied to user in last RTT */
@@ -642,7 +648,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
- tp->rcvq_space.time = tcp_time_stamp;
+ tp->rcvq_space.time = tp->tcp_mstamp;
}
/* There is something which you must keep in mind when you analyze the
@@ -1131,7 +1137,6 @@ struct tcp_sacktag_state {
*/
struct skb_mstamp first_sackt;
struct skb_mstamp last_sackt;
- struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */
struct rate_sample *rate;
int flag;
};
@@ -1214,8 +1219,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) {
- tcp_rack_advance(tp, sacked, end_seq,
- xmit_time, &state->ack_time);
+ tcp_rack_advance(tp, sacked, end_seq, xmit_time);
if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost,
@@ -2760,8 +2764,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
return false;
}
-static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
- const struct skb_mstamp *ack_time)
+static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2769,7 +2772,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
- tcp_rack_mark_lost(sk, ack_time);
+ tcp_rack_mark_lost(sk);
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
}
@@ -2788,8 +2791,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const int acked,
- bool is_dupack, int *ack_flag, int *rexmit,
- const struct skb_mstamp *ack_time)
+ bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2857,11 +2859,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
tcp_try_keep_open(sk);
return;
}
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack, rexmit);
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
return;
@@ -2877,7 +2879,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
if (!tcp_time_to_recover(sk, flag)) {
tcp_try_to_open(sk, flag);
return;
@@ -3059,8 +3061,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt;
- struct skb_mstamp *now = &sack->ack_time;
struct tcp_sock *tp = tcp_sk(sk);
+ struct skb_mstamp *now = &tp->tcp_mstamp;
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
@@ -3120,8 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
- &skb->skb_mstamp,
- &sack->ack_time);
+ &skb->skb_mstamp);
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@ -3576,8 +3577,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt))
goto invalid_ack;
- skb_mstamp_get(&sack_state.ack_time);
-
if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
@@ -3647,8 +3646,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
}
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3660,8 +3658,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
- tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time,
- sack_state.rate);
+ tcp_rate_gen(sk, delivered, lost, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
return 1;
@@ -3669,8 +3666,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3691,11 +3687,9 @@ old_ack:
* If data was DSACKed, see if we can undo a cwnd reduction.
*/
if (TCP_SKB_CB(skb)->sacked) {
- skb_mstamp_get(&sack_state.ack_time);
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
tcp_xmit_recovery(sk, rexmit);
}
@@ -3768,11 +3762,12 @@ void tcp_parse_options(const struct sk_buff *skb,
!estab && sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
- if (snd_wscale > 14) {
- net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
+ if (snd_wscale > TCP_MAX_WSCALE) {
+ net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
__func__,
- snd_wscale);
- snd_wscale = 14;
+ snd_wscale,
+ TCP_MAX_WSCALE);
+ snd_wscale = TCP_MAX_WSCALE;
}
opt_rx->snd_wscale = snd_wscale;
}
@@ -4007,10 +4002,10 @@ void tcp_reset(struct sock *sk)
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
+ tcp_done(sk);
+
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_error_report(sk);
-
- tcp_done(sk);
}
/*
@@ -5299,8 +5294,16 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (rst_seq_match)
tcp_reset(sk);
- else
+ else {
+ /* Disable TFO if RST is out-of-order
+ * and no data has been received
+ * for current active TFO socket
+ */
+ if (tp->syn_fastopen && !tp->data_segs_in &&
+ sk->sk_state == TCP_ESTABLISHED)
+ tcp_fastopen_active_disable(sk);
tcp_send_challenge_ack(sk, skb);
+ }
goto discard;
}
@@ -5353,6 +5356,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
+ skb_mstamp_get(&tp->tcp_mstamp);
if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
@@ -5579,10 +5583,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
else
tp->pred_flags = 0;
- if (!sock_flag(sk, SOCK_DEAD)) {
- sk->sk_state_change(sk);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- }
}
static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -5651,6 +5651,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
+ bool fastopen_fail;
tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
@@ -5754,10 +5755,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_finish_connect(sk, skb);
- if ((tp->syn_fastopen || tp->syn_data) &&
- tcp_rcv_fastopen_synack(sk, skb, &foc))
- return -1;
+ fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
+ tcp_rcv_fastopen_synack(sk, skb, &foc);
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ sk->sk_state_change(sk);
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+ }
+ if (fastopen_fail)
+ return -1;
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
@@ -5911,6 +5917,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_SYN_SENT:
tp->rx_opt.saw_tstamp = 0;
+ skb_mstamp_get(&tp->tcp_mstamp);
queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;
@@ -5922,6 +5929,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
}
+ skb_mstamp_get(&tp->tcp_mstamp);
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
@@ -6041,9 +6049,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
}
- if (tp->linger2 < 0 ||
- (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
- after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
+ if (tp->linger2 < 0) {
+ tcp_done(sk);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ return 1;
+ }
+ if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+ after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+ /* Receive out of order FIN after close() */
+ if (tp->syn_fastopen && th->fin)
+ tcp_fastopen_active_disable(sk);
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
@@ -6333,36 +6348,14 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_free;
if (isn && tmp_opt.tstamp_ok)
- af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
if (!want_cookie && !isn) {
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
- bool strict;
-
- dst = af_ops->route_req(sk, &fl, req, &strict);
-
- if (dst && strict &&
- !tcp_peer_is_proven(req, dst, true,
- tmp_opt.saw_tstamp)) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
/* Kill the following clause, if you dislike this way. */
- else if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false,
- tmp_opt.saw_tstamp)) {
+ if (!net->ipv4.sysctl_tcp_syncookies &&
+ (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ !tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -6375,10 +6368,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_release;
}
- isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
}
if (!dst) {
- dst = af_ops->route_req(sk, &fl, req, NULL);
+ dst = af_ops->route_req(sk, &fl, req);
if (!dst)
goto drop_and_free;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 575e19dcc017..cbbafe546c0f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -94,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
{
- return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source, tsoff);
+ return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source, tsoff);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = 0;
}
- if (tcp_death_row->sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
- tcp_fetch_timewait_stamp(sk, &rt->dst);
-
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
@@ -236,11 +232,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rt = NULL;
if (likely(!tp->repair)) {
- seq = secure_tcp_sequence_number(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port,
- &tp->tsoffset);
+ seq = secure_tcp_seq_and_tsoff(inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port,
+ &tp->tsoffset);
if (!tp->write_seq)
tp->write_seq = seq;
}
@@ -1217,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct flowi *fl,
- const struct request_sock *req,
- bool *strict)
+ const struct request_sock *req)
{
- struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
-
- if (strict) {
- if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
- *strict = true;
- else
- *strict = false;
- }
-
- return dst;
+ return inet_csk_route_req(sk, &fl->u.ip4, req);
}
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
@@ -1253,7 +1239,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.cookie_init_seq = cookie_v4_init_sequence,
#endif
.route_req = tcp_v4_route_req,
- .init_seq = tcp_v4_init_sequence,
+ .init_seq_tsoff = tcp_v4_init_seq_and_tsoff,
.send_synack = tcp_v4_send_synack,
};
@@ -1423,8 +1409,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (!nsk)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
@@ -1871,6 +1855,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* Cleanup up the write buffer. */
tcp_write_queue_purge(sk);
+ /* Check if we want to disable active TFO */
+ tcp_fastopen_active_disable_ofo_check(sk);
+
/* Cleans up our, hopefully empty, out_of_order_queue. */
skb_rbtree_purge(&tp->out_of_order_queue);
@@ -2466,7 +2453,6 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_tw_reuse = 0;
cnt = tcp_hashinfo.ehash_mask + 1;
- net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0f46e5fe31ad..9d0d4f39e42b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -45,8 +45,6 @@ struct tcp_metrics_block {
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
- u32 tcpm_ts;
- u32 tcpm_ts_stamp;
u32 tcpm_lock;
u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
@@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
- tm->tcpm_ts = 0;
- tm->tcpm_ts_stamp = 0;
if (fastopen_clear) {
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
@@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
return tm;
}
-static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
-{
- struct tcp_metrics_block *tm;
- struct inetpeer_addr saddr, daddr;
- unsigned int hash;
- struct net *net;
-
- if (tw->tw_family == AF_INET) {
- inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
- inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
- hash = ipv4_addr_hash(tw->tw_daddr);
- }
-#if IS_ENABLED(CONFIG_IPV6)
- else if (tw->tw_family == AF_INET6) {
- if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
- inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
- inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
- hash = ipv4_addr_hash(tw->tw_daddr);
- } else {
- inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
- inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
- hash = ipv6_addr_hash(&tw->tw_v6_daddr);
- }
- }
-#endif
- else
- return NULL;
-
- net = twsk_net(tw);
- hash ^= net_hash_mix(net);
- hash = hash_32(hash, tcp_metrics_hash_log);
-
- for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
- tm = rcu_dereference(tm->tcpm_next)) {
- if (addr_same(&tm->tcpm_saddr, &saddr) &&
- addr_same(&tm->tcpm_daddr, &daddr) &&
- net_eq(tm_net(tm), net))
- break;
- }
- return tm;
-}
-
static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
struct dst_entry *dst,
bool create)
@@ -573,8 +527,7 @@ reset:
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
- bool paws_check, bool timestamps)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
{
struct tcp_metrics_block *tm;
bool ret;
@@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
rcu_read_lock();
tm = __tcp_get_metrics_req(req, dst);
- if (paws_check) {
- if (tm &&
- (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
- ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
- !timestamps))
- ret = false;
- else
- ret = true;
- } else {
- if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
- ret = true;
- else
- ret = false;
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
-{
- struct tcp_metrics_block *tm;
-
- rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
- if (tm) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
- tp->rx_opt.ts_recent = tm->tcpm_ts;
- }
- }
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
-
-/* VJ's idea. Save last timestamp seen from this destination and hold
- * it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter
- * synchronized state.
- */
-bool tcp_remember_stamp(struct sock *sk)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- bool ret = false;
-
- if (dst) {
- struct tcp_metrics_block *tm;
-
- rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
- if (tm) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
- ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
- tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
- tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
- tm->tcpm_ts = tp->rx_opt.ts_recent;
- }
- ret = true;
- }
- rcu_read_unlock();
- }
- return ret;
-}
-
-bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
-{
- struct tcp_metrics_block *tm;
- bool ret = false;
-
- rcu_read_lock();
- tm = __tcp_get_metrics_tw(tw);
- if (tm) {
- const struct tcp_timewait_sock *tcptw;
- struct sock *sk = (struct sock *) tw;
-
- tcptw = tcp_twsk(sk);
- if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
- ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
- tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
- tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
- tm->tcpm_ts = tcptw->tw_ts_recent;
- }
+ if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
ret = true;
- }
+ else
+ ret = false;
rcu_read_unlock();
return ret;
@@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
jiffies - tm->tcpm_stamp,
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
- if (tm->tcpm_ts_stamp) {
- if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
- (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
- goto nla_put_failure;
- if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
- tm->tcpm_ts) < 0)
- goto nla_put_failure;
- }
{
int n = 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 65c0f3d13eca..8f6373b0cd77 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -26,6 +26,7 @@
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
+#include <net/busy_poll.h>
int sysctl_tcp_abort_on_overflow __read_mostly;
@@ -94,7 +95,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
- struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -149,12 +149,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
}
- if (tcp_death_row->sysctl_tw_recycle &&
- tcptw->tw_ts_recent_stamp &&
- tcp_tw_remember_stamp(tw))
- inet_twsk_reschedule(tw, tw->tw_timeout);
- else
- inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}
@@ -259,12 +254,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
- bool recycle_ok = false;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
- if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
- recycle_ok = tcp_remember_stamp(sk);
-
tw = inet_twsk_alloc(sk, tcp_death_row, state);
if (tw) {
@@ -317,13 +308,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (timeo < rto)
timeo = rto;
- if (recycle_ok) {
- tw->tw_timeout = rto;
- } else {
- tw->tw_timeout = TCP_TIMEWAIT_LEN;
- if (state == TCP_TIME_WAIT)
- timeo = TCP_TIMEWAIT_LEN;
- }
+ tw->tw_timeout = TCP_TIMEWAIT_LEN;
+ if (state == TCP_TIME_WAIT)
+ timeo = TCP_TIMEWAIT_LEN;
inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
@@ -813,6 +800,9 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ /* record NAPI ID of child */
+ sk_mark_napi_id(child, skb);
+
tcp_segs_in(tcp_sk(child), skb);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c3c082ed3879..ffc9274b2706 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -212,12 +212,12 @@ void tcp_select_initial_window(int __space, __u32 mss,
/* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0)
- (*window_clamp) = (65535 << 14);
+ (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
space = min(*window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
- space = (space / mss) * mss;
+ space = rounddown(space, mss);
/* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. If the admin tells us
@@ -234,13 +234,11 @@ void tcp_select_initial_window(int __space, __u32 mss,
(*rcv_wscale) = 0;
if (wscale_ok) {
- /* Set window scaling on max possible window
- * See RFC1323 for an explanation of the limit to 14
- */
+ /* Set window scaling on max possible window */
space = max_t(u32, space, sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
- while (space > 65535 && (*rcv_wscale) < 14) {
+ while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
space >>= 1;
(*rcv_wscale)++;
}
@@ -253,7 +251,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
}
/* Set the clamp no higher than max representable value */
- (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
+ (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
}
EXPORT_SYMBOL(tcp_select_initial_window);
@@ -2561,7 +2559,6 @@ u32 __tcp_select_window(struct sock *sk)
/* Don't do rounding if we are using window scaling, since the
* scaled window will not line up with the MSS boundary anyway.
*/
- window = tp->rcv_wnd;
if (tp->rx_opt.rcv_wscale) {
window = free_space;
@@ -2569,10 +2566,9 @@ u32 __tcp_select_window(struct sock *sk)
* Import case: prevent zero window announcement if
* 1<<rcv_wscale > mss.
*/
- if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
- window = (((window >> tp->rx_opt.rcv_wscale) + 1)
- << tp->rx_opt.rcv_wscale);
+ window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
} else {
+ window = tp->rcv_wnd;
/* Get the largest window that is a nice multiple of mss.
* Window clamp already applied above.
* If our current window offering is within 1 mss of the
@@ -2582,7 +2578,7 @@ u32 __tcp_select_window(struct sock *sk)
* is too small.
*/
if (window <= free_space - mss || window > free_space)
- window = (free_space / mss) * mss;
+ window = rounddown(free_space, mss);
else if (mss == full_space &&
free_space > window + (full_space >> 1))
window = free_space;
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 9be1581a5a08..c6a9fa894646 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
/* Update the connection delivery information and generate a rate sample. */
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct skb_mstamp *now, struct rate_sample *rs)
+ struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 snd_us, ack_us;
@@ -120,7 +120,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* to carry current time, flags, stats like "tcp_sacktag_state".
*/
if (delivered)
- tp->delivered_mstamp = *now;
+ tp->delivered_mstamp = tp->tcp_mstamp;
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
@@ -138,7 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* longer phase.
*/
snd_us = rs->interval_us; /* send phase */
- ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */
+ ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp,
+ &rs->prior_mstamp); /* ack phase */
rs->interval_us = max(snd_us, ack_us);
/* Normally we expect interval_us >= min-rtt.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index d8acbd9f477a..cd72b3d3879e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -45,8 +45,7 @@ static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
* or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will
* make us enter the CA_Recovery state.
*/
-static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
- u32 *reo_timeout)
+static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -79,7 +78,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
* A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window.
*/
- u32 elapsed = skb_mstamp_us_delta(now,
+ u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp,
&skb->skb_mstamp);
s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
@@ -105,7 +104,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
}
}
-void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
+void tcp_rack_mark_lost(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout;
@@ -115,7 +114,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
/* Reset the advanced flag to avoid unnecessary queue scanning */
tp->rack.advanced = 0;
- tcp_rack_detect_loss(sk, now, &timeout);
+ tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
@@ -128,8 +127,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
* draft-cheng-tcpm-rack-00.txt
*/
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
- const struct skb_mstamp *xmit_time,
- const struct skb_mstamp *ack_time)
+ const struct skb_mstamp *xmit_time)
{
u32 rtt_us;
@@ -138,7 +136,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
end_seq, tp->rack.end_seq))
return;
- rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
+ rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time);
if (sacked & TCPCB_RETRANS) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
@@ -165,12 +163,10 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
void tcp_rack_reo_timeout(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct skb_mstamp now;
u32 timeout, prior_inflight;
- skb_mstamp_get(&now);
prior_inflight = tcp_packets_in_flight(tp);
- tcp_rack_detect_loss(sk, &now, &timeout);
+ tcp_rack_detect_loss(sk, &timeout);
if (prior_inflight != tcp_packets_in_flight(tp)) {
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
tcp_enter_recovery(sk, false);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b2ab411c6d37..14672543cf0b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -201,11 +201,10 @@ static int tcp_write_timeout(struct sock *sk)
if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
/* Some middle-boxes may black-hole Fast Open _after_
* the handshake. Therefore we conservatively disable
- * Fast Open on this path on recurring timeouts with
- * few or zero bytes acked after Fast Open.
+ * Fast Open on this path on recurring timeouts after
+ * successful Fast Open.
*/
- if (tp->syn_data_acked &&
- tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+ if (tp->syn_data_acked) {
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
NET_INC_STATS(sock_net(sk),
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index fed66dc0e0f5..9775453b8d17 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -265,8 +265,8 @@ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr,
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
info->vegas.tcpv_enabled = 1;
info->vegas.tcpv_rttcnt = 0;
- info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt),
- info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+ info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt);
+ info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
*attr = INET_DIAG_VEGASINFO;
return sizeof(struct tcpvegas_info);
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 4acc0508c5eb..3d36644890bb 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -12,6 +12,7 @@
#include <net/dst.h>
#include <net/ip.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
/* Add encapsulation header.
*
@@ -23,6 +24,8 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph = ip_hdr(skb);
int ihl = iph->ihl * 4;
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
offsetof(struct iphdr, protocol);
@@ -56,9 +59,40 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb->transport_header += x->props.header_len;
+ ops = rcu_dereference(inet_offloads[xo->proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
+static void xfrm4_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + sizeof(struct iphdr) + x->props.header_len);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb_reset_transport_header(skb);
+ skb->transport_header -= x->props.header_len;
+ }
+}
+
static struct xfrm_mode xfrm4_transport_mode = {
.input = xfrm4_transport_input,
.output = xfrm4_transport_output,
+ .gso_segment = xfrm4_transport_gso_segment,
+ .xmit = xfrm4_transport_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TRANSPORT,
};
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 35feda676464..e6265e2c274e 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,6 +33,9 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *top_iph;
int flags;
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
offsetof(struct iphdr, protocol);
@@ -96,11 +99,36 @@ out:
return err;
}
+static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __skb_push(skb, skb->mac_len);
+ return skb_mac_gso_segment(skb, features);
+
+}
+
+static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb->network_header = skb->network_header - x->props.header_len;
+ skb->transport_header = skb->network_header +
+ sizeof(struct iphdr);
+ }
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + x->props.header_len);
+}
+
static struct xfrm_mode xfrm4_tunnel_mode = {
.input2 = xfrm4_mode_tunnel_input,
.input = xfrm_prepare_input,
.output2 = xfrm4_mode_tunnel_output,
.output = xfrm4_prepare_output,
+ .gso_segment = xfrm4_mode_tunnel_gso_segment,
+ .xmit = xfrm4_mode_tunnel_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TUNNEL,
.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 7ee6518afa86..94b8702603bc 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -29,7 +29,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
goto out;
mtu = dst_mtu(skb_dst(skb));
- if (skb->len > mtu) {
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) {
skb->protocol = htons(ETH_P_IP);
if (skb->sk)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e2afe677a9d9..48c452959d2c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -307,6 +307,7 @@ config IPV6_SEG6_LWTUNNEL
bool "IPv6: Segment Routing Header encapsulation support"
depends on IPV6
select LWTUNNEL
+ select DST_CACHE
---help---
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0ea96c4d334d..b09ac38d8dc4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -224,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_ra_rtr_pref = 1,
.rtr_probe_interval = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_min_plen = 0,
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
@@ -245,6 +246,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
#endif
.enhanced_dad = 1,
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
+ .disable_policy = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -276,6 +278,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_ra_rtr_pref = 1,
.rtr_probe_interval = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_min_plen = 0,
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
@@ -297,6 +300,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
#endif
.enhanced_dad = 1,
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
+ .disable_policy = 0,
};
/* Check if a valid qdisc is available */
@@ -545,6 +549,9 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
goto nla_put_failure;
+ if (!devconf)
+ goto out;
+
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
goto nla_put_failure;
@@ -563,6 +570,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
devconf->ignore_routes_with_linkdown) < 0)
goto nla_put_failure;
+out:
nlmsg_end(skb, nlh);
return 0;
@@ -571,8 +579,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
- struct ipv6_devconf *devconf)
+void inet6_netconf_notify_devconf(struct net *net, int event, int type,
+ int ifindex, struct ipv6_devconf *devconf)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -582,7 +590,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
goto errout;
err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
- RTM_NEWNETCONF, 0, type);
+ event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -603,7 +611,8 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
};
static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
@@ -616,7 +625,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_ipv6_policy);
+ devconf_ipv6_policy, extack);
if (err < 0)
goto errout;
@@ -765,7 +774,8 @@ static void dev_forward_change(struct inet6_dev *idev)
else
addrconf_leave_anycast(ifa);
}
- inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
}
@@ -800,7 +810,8 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_dflt->forwarding) {
if ((!newf) ^ (!old))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
rtnl_unlock();
@@ -812,13 +823,15 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
net->ipv6.devconf_dflt->forwarding = newf;
if ((!newf) ^ (!old_dflt))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
addrconf_forward_change(net, newf);
if ((!newf) ^ (!old))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
@@ -843,6 +856,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
idev->cnf.ignore_routes_with_linkdown = newf;
if (changed)
inet6_netconf_notify_devconf(dev_net(dev),
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
dev->ifindex,
&idev->cnf);
@@ -865,6 +879,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
@@ -877,6 +892,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
addrconf_linkdown_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
@@ -944,6 +960,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
const struct in6_addr *peer_addr, int pfxlen,
int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
{
+ struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
struct rt6_info *rt;
unsigned int hash;
@@ -990,6 +1007,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out;
}
+ if (net->ipv6.devconf_all->disable_policy ||
+ idev->cnf.disable_policy)
+ rt->dst.flags |= DST_NOPOLICY;
+
neigh_parms_data_state_setall(idev->nd_parms);
ifa->addr = *addr;
@@ -2053,12 +2074,23 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
__ipv6_dev_ac_dec(ifp->idev, &addr);
}
-static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
+static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev)
{
- if (dev->addr_len != EUI64_ADDR_LEN)
+ switch (dev->addr_len) {
+ case ETH_ALEN:
+ memcpy(eui, dev->dev_addr, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ memcpy(eui + 5, dev->dev_addr + 3, 3);
+ break;
+ case EUI64_ADDR_LEN:
+ memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN);
+ eui[0] ^= 2;
+ break;
+ default:
return -1;
- memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN);
- eui[0] ^= 2;
+ }
+
return 0;
}
@@ -2150,7 +2182,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
case ARPHRD_TUNNEL:
return addrconf_ifid_gre(eui, dev);
case ARPHRD_6LOWPAN:
- return addrconf_ifid_eui64(eui, dev);
+ return addrconf_ifid_6lowpan(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
case ARPHRD_TUNNEL6:
@@ -4392,7 +4424,8 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
};
static int
-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
@@ -4401,7 +4434,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
u32 ifa_flags;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
+ extack);
if (err < 0)
return err;
@@ -4501,7 +4535,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
}
static int
-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
@@ -4513,7 +4548,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
u32 ifa_flags;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
+ extack);
if (err < 0)
return err;
@@ -4863,7 +4899,8 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
return inet6_dump_addr(skb, cb, type);
}
-static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrmsg *ifm;
@@ -4874,7 +4911,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct sk_buff *skb;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
+ extack);
if (err < 0)
goto errout;
@@ -4985,6 +5023,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_PROBE_INTERVAL] =
jiffies_to_msecs(cnf->rtr_probe_interval);
#ifdef CONFIG_IPV6_ROUTE_INFO
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
#endif
#endif
@@ -5016,6 +5055,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
#endif
array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
+ array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
}
static inline size_t inet6_ifla6_size(void)
@@ -5242,7 +5282,8 @@ static int inet6_validate_link_af(const struct net_device *dev,
if (dev && !__in6_dev_get(dev))
return -EAFNOSUPPORT;
- return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy);
+ return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy,
+ NULL);
}
static int check_addr_gen_mode(int mode)
@@ -5274,7 +5315,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (!idev)
return -EAFNOSUPPORT;
- if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+ if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
BUG();
if (tb[IFLA_INET6_TOKEN]) {
@@ -5677,17 +5718,20 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
return restart_syscall();
if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
else if (valp == &net->ipv6.devconf_all->proxy_ndp)
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
else {
struct inet6_dev *idev = ctl->extra1;
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
idev->dev->ifindex,
&idev->cnf);
}
@@ -5840,6 +5884,105 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
return ret;
}
+static
+void addrconf_set_nopolicy(struct rt6_info *rt, int action)
+{
+ if (rt) {
+ if (action)
+ rt->dst.flags |= DST_NOPOLICY;
+ else
+ rt->dst.flags &= ~DST_NOPOLICY;
+ }
+}
+
+static
+void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
+{
+ struct inet6_ifaddr *ifa;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ spin_lock(&ifa->lock);
+ if (ifa->rt) {
+ struct rt6_info *rt = ifa->rt;
+ struct fib6_table *table = rt->rt6i_table;
+ int cpu;
+
+ read_lock(&table->tb6_lock);
+ addrconf_set_nopolicy(ifa->rt, val);
+ if (rt->rt6i_pcpu) {
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **rtp;
+
+ rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+ addrconf_set_nopolicy(*rtp, val);
+ }
+ }
+ read_unlock(&table->tb6_lock);
+ }
+ spin_unlock(&ifa->lock);
+ }
+ read_unlock_bh(&idev->lock);
+}
+
+static
+int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
+{
+ struct inet6_dev *idev;
+ struct net *net;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ *valp = val;
+
+ net = (struct net *)ctl->extra2;
+ if (valp == &net->ipv6.devconf_dflt->disable_policy) {
+ rtnl_unlock();
+ return 0;
+ }
+
+ if (valp == &net->ipv6.devconf_all->disable_policy) {
+ struct net_device *dev;
+
+ for_each_netdev(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (idev)
+ addrconf_disable_policy_idev(idev, val);
+ }
+ } else {
+ idev = (struct inet6_dev *)ctl->extra1;
+ addrconf_disable_policy_idev(idev, val);
+ }
+
+ rtnl_unlock();
+ return 0;
+}
+
+static
+int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ loff_t pos = *ppos;
+ struct ctl_table lctl;
+ int ret;
+
+ lctl = *ctl;
+ lctl.data = &val;
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+ if (write && (*valp != val))
+ ret = addrconf_disable_policy(ctl, valp, val);
+
+ if (ret)
+ *ppos = pos;
+
+ return ret;
+}
+
static int minus_one = -1;
static const int one = 1;
static const int two_five_five = 255;
@@ -6028,6 +6171,13 @@ static const struct ctl_table addrconf_sysctl[] = {
},
#ifdef CONFIG_IPV6_ROUTE_INFO
{
+ .procname = "accept_ra_rt_info_min_plen",
+ .data = &ipv6_devconf.accept_ra_rt_info_min_plen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_rt_info_max_plen",
.data = &ipv6_devconf.accept_ra_rt_info_max_plen,
.maxlen = sizeof(int),
@@ -6198,6 +6348,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = addrconf_sysctl_addr_gen_mode,
},
{
+ .procname = "disable_policy",
+ .data = &ipv6_devconf.disable_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_disable_policy,
+ },
+ {
/* sentinel */
}
};
@@ -6237,7 +6394,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
ifindex = NETCONFA_IFINDEX_DEFAULT;
else
ifindex = idev->dev->ifindex;
- inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+ ifindex, p);
return 0;
free:
@@ -6246,7 +6404,8 @@ out:
return -ENOBUFS;
}
-static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
+static void __addrconf_sysctl_unregister(struct net *net,
+ struct ipv6_devconf *p, int ifindex)
{
struct ctl_table *table;
@@ -6257,6 +6416,8 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
unregister_net_sysctl_table(p->sysctl_header);
p->sysctl_header = NULL;
kfree(table);
+
+ inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}
static int addrconf_sysctl_register(struct inet6_dev *idev)
@@ -6280,7 +6441,8 @@ static int addrconf_sysctl_register(struct inet6_dev *idev)
static void addrconf_sysctl_unregister(struct inet6_dev *idev)
{
- __addrconf_sysctl_unregister(&idev->cnf);
+ __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf,
+ idev->dev->ifindex);
neigh_sysctl_unregister(idev->nd_parms);
}
@@ -6323,7 +6485,7 @@ static int __net_init addrconf_init_net(struct net *net)
#ifdef CONFIG_SYSCTL
err_reg_dflt:
- __addrconf_sysctl_unregister(all);
+ __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
kfree(dflt);
#endif
@@ -6336,8 +6498,10 @@ err_alloc_all:
static void __net_exit addrconf_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
- __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
- __addrconf_sysctl_unregister(net->ipv6.devconf_all);
+ __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
+ NETCONFA_IFINDEX_DEFAULT);
+ __addrconf_sysctl_unregister(net, net->ipv6.devconf_all,
+ NETCONFA_IFINDEX_ALL);
#endif
kfree(net->ipv6.devconf_dflt);
kfree(net->ipv6.devconf_all);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index a8f6986dcbe5..07cd7d248bb6 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -404,7 +404,8 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
[IFAL_LABEL] = { .len = sizeof(u32), },
};
-static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct ifaddrlblmsg *ifal;
@@ -413,7 +414,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
u32 label;
int err = 0;
- err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
+ extack);
if (err < 0)
return err;
@@ -521,7 +523,8 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -532,7 +535,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct ip6addrlbl_entry *p;
struct sk_buff *skb;
- err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e82e59f22dfc..a88b5b5b7955 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1003,6 +1003,10 @@ static int __init inet6_init(void)
if (err)
goto seg6_fail;
+ err = igmp6_late_init();
+ if (err)
+ goto igmp6_late_err;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -1017,8 +1021,10 @@ out:
#ifdef CONFIG_SYSCTL
sysctl_fail:
- seg6_exit();
+ igmp6_late_cleanup();
#endif
+igmp6_late_err:
+ seg6_exit();
seg6_fail:
calipso_exit();
calipso_fail:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ff54faa75631..8b55abf1c45b 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -170,19 +170,23 @@ static void esp_output_restore_header(struct sk_buff *skb)
}
static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
+ struct xfrm_state *x,
struct ip_esp_hdr *esph,
__be32 *seqhi)
{
- struct xfrm_state *x = skb_dst(skb)->xfrm;
-
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* encryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
*seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ if (xo)
+ esph->seq_no = htonl(xo->seq.hi);
+ else
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
}
esph->spi = x->id.spi;
@@ -214,61 +218,16 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
- int err;
- struct ip_esp_hdr *esph;
- struct crypto_aead *aead;
- struct aead_request *req;
- struct scatterlist *sg, *dsg;
- struct sk_buff *trailer;
- struct page *page;
- void *tmp;
- int blksize;
- int clen;
- int alen;
- int plen;
- int ivlen;
- int tfclen;
- int nfrags;
- int assoclen;
- int seqhilen;
- int tailen;
- u8 *iv;
u8 *tail;
u8 *vaddr;
- __be32 *seqhi;
- __be64 seqno;
- __u8 proto = *skb_mac_header(skb);
-
- /* skb is pure payload to encrypt */
- aead = x->data;
- alen = crypto_aead_authsize(aead);
- ivlen = crypto_aead_ivsize(aead);
-
- tfclen = 0;
- if (x->tfcpad) {
- struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
- u32 padto;
-
- padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
- if (skb->len < padto)
- tfclen = padto - skb->len;
- }
- blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(skb->len + 2 + tfclen, blksize);
- plen = clen - skb->len - tfclen;
- tailen = tfclen + plen + alen;
-
- assoclen = sizeof(*esph);
- seqhilen = 0;
-
- if (x->props.flags & XFRM_STATE_ESN) {
- seqhilen += sizeof(__be32);
- assoclen += seqhilen;
- }
+ int nfrags;
+ struct page *page;
+ struct ip_esp_hdr *esph;
+ struct sk_buff *trailer;
+ int tailen = esp->tailen;
- *skb_mac_header(skb) = IPPROTO_ESP;
esph = ip_esp_hdr(skb);
if (!skb_cloned(skb)) {
@@ -284,6 +243,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct sock *sk = skb->sk;
struct page_frag *pfrag = &x->xfrag;
+ esp->inplace = false;
+
allocsize = ALIGN(tailen, L1_CACHE_BYTES);
spin_lock_bh(&x->lock);
@@ -300,10 +261,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
tail = vaddr + pfrag->offset;
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
kunmap_atomic(vaddr);
+ spin_unlock_bh(&x->lock);
+
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -319,77 +282,56 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
if (sk)
atomic_add(tailen, &sk->sk_wmem_alloc);
- skb_push(skb, -skb_network_offset(skb));
-
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
-
- tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen);
- if (!tmp) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
- seqhi = esp_tmp_seqhi(tmp);
- iv = esp_tmp_iv(aead, tmp, seqhilen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
- dsg = &sg[nfrags];
-
- esph = esp_output_set_esn(skb, esph, seqhi);
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
-
- if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
- spin_unlock_bh(&x->lock);
- err = -ENOMEM;
- goto error;
- }
-
- skb_shinfo(skb)->nr_frags = 1;
-
- page = pfrag->page;
- get_page(page);
- /* replace page frags in skb with new page */
- __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
- pfrag->offset = pfrag->offset + allocsize;
-
- sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
- skb_to_sgvec(skb, dsg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
-
- spin_unlock_bh(&x->lock);
-
- goto skip_cow2;
+ goto out;
}
}
cow:
- err = skb_cow_data(skb, tailen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
-
+ nfrags = skb_cow_data(skb, tailen, &trailer);
+ if (nfrags < 0)
+ goto out;
tail = skb_tail_pointer(trailer);
- esph = ip_esp_hdr(skb);
skip_cow:
- esp_output_fill_trailer(tail, tfclen, plen, proto);
+ esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
+ pskb_put(skb, trailer, tailen);
- pskb_put(skb, trailer, clen - skb->len + alen);
- skb_push(skb, -skb_network_offset(skb));
+out:
+ return nfrags;
+}
+EXPORT_SYMBOL_GPL(esp6_output_head);
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- esph->spi = x->id.spi;
+int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+{
+ u8 *iv;
+ int alen;
+ void *tmp;
+ int ivlen;
+ int assoclen;
+ int seqhilen;
+ __be32 *seqhi;
+ struct page *page;
+ struct ip_esp_hdr *esph;
+ struct aead_request *req;
+ struct crypto_aead *aead;
+ struct scatterlist *sg, *dsg;
+ int err = -ENOMEM;
- tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
+ assoclen = sizeof(struct ip_esp_hdr);
+ seqhilen = 0;
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ seqhilen += sizeof(__be32);
+ assoclen += sizeof(__be32);
+ }
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ ivlen = crypto_aead_ivsize(aead);
+
+ tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen);
if (!tmp) {
+ spin_unlock_bh(&x->lock);
err = -ENOMEM;
goto error;
}
@@ -398,29 +340,57 @@ skip_cow:
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- dsg = sg;
- esph = esp_output_set_esn(skb, esph, seqhi);
+ if (esp->inplace)
+ dsg = sg;
+ else
+ dsg = &sg[esp->nfrags];
- sg_init_table(sg, nfrags);
+ esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi);
+
+ sg_init_table(sg, esp->nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
- assoclen + ivlen + clen + alen);
+ assoclen + ivlen + esp->clen + alen);
+
+ if (!esp->inplace) {
+ int allocsize;
+ struct page_frag *pfrag = &x->xfrag;
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+ spin_unlock_bh(&x->lock);
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ }
-skip_cow2:
if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_output_done_esn, skb);
else
aead_request_set_callback(req, 0, esp_output_done, skb);
- aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
+ aead_request_set_crypt(req, sg, dsg, ivlen + esp->clen, iv);
aead_request_set_ad(req, assoclen);
- seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
- ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
-
memset(iv, 0, ivlen);
- memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
+ memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&esp->seqno + 8 - min(ivlen, 8),
min(ivlen, 8));
ESP_SKB_CB(skb)->tmp = tmp;
@@ -446,10 +416,60 @@ skip_cow2:
error:
return err;
}
+EXPORT_SYMBOL_GPL(esp6_output_tail);
+
+static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int alen;
+ int blksize;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+
+ esp.inplace = true;
+
+ esp.proto = *skb_mac_header(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ if (x->tfcpad) {
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+ padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ esp.tfclen = padto - skb->len;
+ }
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ esp.nfrags = esp6_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+
+ esph = ip_esp_hdr(skb);
+ esph->spi = x->id.spi;
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esp.seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
+ ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ return esp6_output_tail(x, skb, &esp);
+}
-static int esp_input_done2(struct sk_buff *skb, int err)
+int esp6_input_done2(struct sk_buff *skb, int err)
{
struct xfrm_state *x = xfrm_input_state(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
@@ -458,7 +478,8 @@ static int esp_input_done2(struct sk_buff *skb, int err)
int padlen;
u8 nexthdr[2];
- kfree(ESP_SKB_CB(skb)->tmp);
+ if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
goto out;
@@ -492,12 +513,13 @@ static int esp_input_done2(struct sk_buff *skb, int err)
out:
return err;
}
+EXPORT_SYMBOL_GPL(esp6_input_done2);
static void esp_input_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
- xfrm_input_resume(skb, esp_input_done2(skb, err));
+ xfrm_input_resume(skb, esp6_input_done2(skb, err));
}
static void esp_input_restore_header(struct sk_buff *skb)
@@ -619,7 +641,7 @@ skip_cow:
if ((x->props.flags & XFRM_STATE_ESN))
esp_input_restore_header(skb);
- ret = esp_input_done2(skb, ret);
+ ret = esp6_input_done2(skb, ret);
out:
return ret;
@@ -682,13 +704,17 @@ static int esp_init_aead(struct xfrm_state *x)
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
+ u32 mask = 0;
err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
- aead = crypto_alloc_aead(aead_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(aead_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -718,6 +744,7 @@ static int esp_init_authenc(struct xfrm_state *x)
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
+ u32 mask = 0;
err = -EINVAL;
if (!x->ealg)
@@ -743,7 +770,10 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
}
- aead = crypto_alloc_aead(authenc_name, 0, 0);
+ if (x->xso.offload_handle)
+ mask |= CRYPTO_ALG_ASYNC;
+
+ aead = crypto_alloc_aead(authenc_name, 0, mask);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d914eb93204a..d950d43ba255 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -45,27 +45,31 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
- err = secpath_set(skb);
- if (err)
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo || !(xo->flags & CRYPTO_DONE)) {
+ err = secpath_set(skb);
+ if (err)
+ goto out;
- if (skb->sp->len == XFRM_MAX_DEPTH)
- goto out;
+ if (skb->sp->len == XFRM_MAX_DEPTH)
+ goto out;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
- if (!x)
- goto out;
+ x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
+ if (!x)
+ goto out;
- skb->sp->xvec[skb->sp->len++] = x;
- skb->sp->olen++;
+ skb->sp->xvec[skb->sp->len++] = x;
+ skb->sp->olen++;
- xo = xfrm_offload(skb);
- if (!xo) {
- xfrm_state_put(x);
- goto out;
+ xo = xfrm_offload(skb);
+ if (!xo) {
+ xfrm_state_put(x);
+ goto out;
+ }
}
+
xo->flags |= XFRM_GRO;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
@@ -86,19 +90,216 @@ out:
return NULL;
}
+static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_esp_hdr *esph;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ int proto = iph->nexthdr;
+
+ skb_push(skb, -skb_network_offset(skb));
+ esph = ip_esp_hdr(skb);
+ *skb_mac_header(skb) = IPPROTO_ESP;
+
+ esph->spi = x->id.spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+
+ xo->proto = proto;
+}
+
+static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __u32 seq;
+ int err = 0;
+ struct sk_buff *skb2;
+ struct xfrm_state *x;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ netdev_features_t esp_features = features;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (!xo)
+ goto out;
+
+ seq = xo->seq.low;
+
+ x = skb->sp->xvec[skb->sp->len - 1];
+ aead = x->data;
+ esph = ip_esp_hdr(skb);
+
+ if (esph->spi != x->id.spi)
+ goto out;
+
+ if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
+ goto out;
+
+ __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
+
+ skb->encap_hdr_csum = 1;
+
+ if (!(features & NETIF_F_HW_ESP))
+ esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+
+ segs = x->outer_mode->gso_segment(x, skb, esp_features);
+ if (IS_ERR_OR_NULL(segs))
+ goto out;
+
+ __skb_pull(skb, skb->data - skb_mac_header(skb));
+
+ skb2 = segs;
+ do {
+ struct sk_buff *nskb = skb2->next;
+
+ xo = xfrm_offload(skb2);
+ xo->flags |= XFRM_GSO_SEGMENT;
+ xo->seq.low = seq;
+ xo->seq.hi = xfrm_replay_seqhi(x, seq);
+
+ if(!(features & NETIF_F_HW_ESP))
+ xo->flags |= CRYPTO_FALLBACK;
+
+ x->outer_mode->xmit(x, skb2);
+
+ err = x->type_offload->xmit(x, skb2, esp_features);
+ if (err) {
+ kfree_skb_list(segs);
+ return ERR_PTR(err);
+ }
+
+ if (!skb_is_gso(skb2))
+ seq++;
+ else
+ seq += skb_shinfo(skb2)->gso_segs;
+
+ skb_push(skb2, skb2->mac_len);
+ skb2 = nskb;
+ } while (skb2);
+
+out:
+ return segs;
+}
+
+static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct crypto_aead *aead = x->data;
+
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
+ return -EINVAL;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return esp6_input_done2(skb, 0);
+}
+
+static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features)
+{
+ int err;
+ int alen;
+ int blksize;
+ struct xfrm_offload *xo;
+ struct ip_esp_hdr *esph;
+ struct crypto_aead *aead;
+ struct esp_info esp;
+ bool hw_offload = true;
+
+ esp.inplace = true;
+
+ xo = xfrm_offload(skb);
+
+ if (!xo)
+ return -EINVAL;
+
+ if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+ (x->xso.dev != skb->dev)) {
+ xo->flags |= CRYPTO_FALLBACK;
+ hw_offload = false;
+ }
+
+ esp.proto = xo->proto;
+
+ /* skb is pure payload to encrypt */
+
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+
+ esp.tfclen = 0;
+ /* XXX: Add support for tfc padding here. */
+
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize);
+ esp.plen = esp.clen - skb->len - esp.tfclen;
+ esp.tailen = esp.tfclen + esp.plen + alen;
+
+ if (!hw_offload || (hw_offload && !skb_is_gso(skb))) {
+ esp.nfrags = esp6_output_head(x, skb, &esp);
+ if (esp.nfrags < 0)
+ return esp.nfrags;
+ }
+
+ esph = ip_esp_hdr(skb);
+ esph->spi = x->id.spi;
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ esph->seq_no = htonl(xo->seq.low);
+ } else {
+ int len;
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+
+ ipv6_hdr(skb)->payload_len = htons(len);
+ }
+
+ if (hw_offload)
+ return 0;
+
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+ err = esp6_output_tail(x, skb, &esp);
+ if (err < 0)
+ return err;
+
+ secpath_reset(skb);
+
+ return 0;
+}
+
static const struct net_offload esp6_offload = {
.callbacks = {
.gro_receive = esp6_gro_receive,
+ .gso_segment = esp6_gso_segment,
},
};
+static const struct xfrm_type_offload esp6_type_offload = {
+ .description = "ESP6 OFFLOAD",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ESP,
+ .input_tail = esp6_input_tail,
+ .xmit = esp6_xmit,
+ .encap = esp6_gso_encap,
+};
+
static int __init esp6_offload_init(void)
{
+ if (xfrm_register_type_offload(&esp6_type_offload, AF_INET6) < 0) {
+ pr_info("%s: can't add xfrm type offload\n", __func__);
+ return -EAGAIN;
+ }
+
return inet6_add_offload(&esp6_offload, IPPROTO_ESP);
}
static void __exit esp6_offload_exit(void)
{
+ if (xfrm_unregister_type_offload(&esp6_type_offload, AF_INET6) < 0)
+ pr_info("%s: can't remove xfrm type offload\n", __func__);
+
inet6_del_offload(&esp6_offload, IPPROTO_ESP);
}
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index ce1aae4a7fc8..b3df03e3faa0 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -146,8 +146,7 @@ static int ila_build_state(struct nlattr *nla,
return -EINVAL;
}
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
- ila_nl_policy);
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL);
if (ret < 0)
return ret;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6fcb7cb49bb2..8d128ba79b66 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -544,6 +544,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
& IPV6_TCLASS_MASK;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -603,6 +605,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -780,6 +784,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
t->parms.o_key = p->o_key;
t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags;
+ t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
ip6gre_tnl_link_config(t, set_mtu);
return 0;
@@ -1249,6 +1254,9 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_FLAGS])
parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
+
+ if (data[IFLA_GRE_FWMARK])
+ parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -1470,6 +1478,8 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1490,7 +1500,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
- nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
+ nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
+ nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1525,6 +1536,7 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c45b12b4431c..9ee208a348f5 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,8 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ void (*edemux)(struct sk_buff *skb);
+
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && ipprot->early_demux)
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
+ edemux(skb);
}
if (!skb_valid_dst(skb))
ip6_route_input(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a9692ec0cd6d..8a1bd52423ca 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1258,6 +1258,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
& IPV6_TCLASS_MASK;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -1340,6 +1342,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
+ else
+ fl6.flowi6_mark = t->parms.fwmark;
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
@@ -1469,6 +1473,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.flowinfo = p->flowinfo;
t->parms.link = p->link;
t->parms.proto = p->proto;
+ t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
ip6_tnl_link_config(t);
return 0;
@@ -1920,6 +1925,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_COLLECT_METADATA])
parms->collect_md = true;
+
+ if (data[IFLA_IPTUN_FWMARK])
+ parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
@@ -2056,6 +2064,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_IPTUN_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -2071,7 +2081,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
- nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
@@ -2083,6 +2094,7 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (parm->collect_md)
if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -2111,6 +2123,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ip6_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 3d8a3b63b4fd..d67ef56454b2 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -657,6 +657,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
t->parms.proto = p->proto;
+ t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
vti6_link_config(t);
return 0;
@@ -933,6 +934,9 @@ static void vti6_netlink_parms(struct nlattr *data[],
if (data[IFLA_VTI_OKEY])
parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
+
+ if (data[IFLA_VTI_FWMARK])
+ parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]);
}
static int vti6_newlink(struct net *src_net, struct net_device *dev,
@@ -998,6 +1002,8 @@ static size_t vti6_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_VTI_OKEY */
nla_total_size(4) +
+ /* IFLA_VTI_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1010,7 +1016,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) ||
nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) ||
nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
- nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
+ nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key) ||
+ nla_put_u32(skb, IFLA_VTI_FWMARK, parm->fwmark))
goto nla_put_failure;
return 0;
@@ -1024,6 +1031,7 @@ static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = {
[IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) },
[IFLA_VTI_IKEY] = { .type = NLA_U32 },
[IFLA_VTI_OKEY] = { .type = NLA_U32 },
+ [IFLA_VTI_FWMARK] = { .type = NLA_U32 },
};
static struct rtnl_link_ops vti6_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bf34d0950752..374997d26488 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -816,7 +816,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
in6_dev->cnf.mc_forwarding--;
- inet6_netconf_notify_devconf(dev_net(dev),
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
}
@@ -975,7 +975,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
in6_dev->cnf.mc_forwarding++;
- inet6_netconf_notify_devconf(dev_net(dev),
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
}
@@ -1598,7 +1598,8 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
write_unlock_bh(&mrt_lock);
if (!err)
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
rtnl_unlock();
@@ -1619,7 +1620,7 @@ int ip6mr_sk_done(struct sock *sk)
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
- inet6_netconf_notify_devconf(net,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1bdc703cb966..07403fa164e1 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2463,7 +2463,6 @@ static void mld_ifc_event(struct inet6_dev *idev)
mld_ifc_start_timer(idev, 1);
}
-
static void igmp6_timer_handler(unsigned long data)
{
struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
@@ -2599,6 +2598,44 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
write_unlock_bh(&idev->lock);
}
+static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *pmc;
+
+ ASSERT_RTNL();
+
+ if (mld_in_v1_mode(idev)) {
+ read_lock_bh(&idev->lock);
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next)
+ igmp6_join_group(pmc);
+ read_unlock_bh(&idev->lock);
+ } else
+ mld_send_report(idev, NULL);
+}
+
+static int ipv6_mc_netdev_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ switch (event) {
+ case NETDEV_RESEND_IGMP:
+ if (idev)
+ ipv6_mc_rejoin_groups(idev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block igmp6_netdev_notifier = {
+ .notifier_call = ipv6_mc_netdev_event,
+};
+
#ifdef CONFIG_PROC_FS
struct igmp6_mc_iter_state {
struct seq_net_private p;
@@ -2970,7 +3007,17 @@ int __init igmp6_init(void)
return register_pernet_subsys(&igmp6_net_ops);
}
+int __init igmp6_late_init(void)
+{
+ return register_netdevice_notifier(&igmp6_netdev_notifier);
+}
+
void igmp6_cleanup(void)
{
unregister_pernet_subsys(&igmp6_net_ops);
}
+
+void igmp6_late_cleanup(void)
+{
+ unregister_netdevice_notifier(&igmp6_netdev_notifier);
+}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index cb1766724a4c..d310dc41209a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -732,7 +732,7 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
struct ndisc_options *ndopts)
{
- neigh_update(neigh, lladdr, new, flags);
+ neigh_update(neigh, lladdr, new, flags, 0);
/* report ndisc ops about neighbour update */
ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
}
@@ -1418,6 +1418,8 @@ skip_linkparms:
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
+ if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+ continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
@@ -1746,6 +1748,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
fib6_run_gc(0, net, false);
+ /* fallthrough */
+ case NETDEV_UP:
idev = in6_dev_get(dev);
if (!idev)
break;
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 765facf03d45..e8d88d82636b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e3770abe688a..b5d54d4f995c 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -26,7 +26,7 @@
#include <net/protocol.h>
#if IS_ENABLED(CONFIG_IPV6)
-const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet6_protos);
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb174b590fd3..a1bf426c959b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2910,7 +2910,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int pref;
int err;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
+ NULL);
if (err < 0)
goto errout;
@@ -3263,7 +3264,8 @@ static int ip6_route_multipath_del(struct fib6_config *cfg)
return last_err;
}
-static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg;
int err;
@@ -3280,7 +3282,8 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
-static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg;
int err;
@@ -3568,7 +3571,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
NLM_F_MULTI);
}
-static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
@@ -3578,7 +3582,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct flowi6 fl6;
int err, iif = 0, oif = 0;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
+ extack);
if (err < 0)
goto errout;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 85582257d3af..6a495490d43e 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,17 +26,13 @@
#include <linux/seg6_iptunnel.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
-#ifdef CONFIG_DST_CACHE
#include <net/dst_cache.h>
-#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
struct seg6_lwt {
-#ifdef CONFIG_DST_CACHE
struct dst_cache cache;
-#endif
struct seg6_iptunnel_encap tuninfo[0];
};
@@ -105,7 +101,7 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
+ err = skb_cow_head(skb, tot_len);
if (unlikely(err))
return err;
@@ -156,7 +152,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
+ err = skb_cow_head(skb, hdrlen);
if (unlikely(err))
return err;
@@ -237,6 +233,9 @@ static int seg6_do_srh(struct sk_buff *skb)
static int seg6_input(struct sk_buff *skb)
{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct dst_entry *dst = NULL;
+ struct seg6_lwt *slwt;
int err;
err = seg6_do_srh(skb);
@@ -245,8 +244,30 @@ static int seg6_input(struct sk_buff *skb)
return err;
}
+ slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+ preempt_disable();
+ dst = dst_cache_get(&slwt->cache);
+ preempt_enable();
+
skb_dst_drop(skb);
- ip6_route_input(skb);
+
+ if (!dst) {
+ ip6_route_input(skb);
+ dst = skb_dst(skb);
+ if (!dst->error) {
+ preempt_disable();
+ dst_cache_set_ip6(&slwt->cache, dst,
+ &ipv6_hdr(skb)->saddr);
+ preempt_enable();
+ }
+ } else {
+ skb_dst_set(skb, dst);
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ return err;
return dst_input(skb);
}
@@ -264,11 +285,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
-#ifdef CONFIG_DST_CACHE
preempt_disable();
dst = dst_cache_get(&slwt->cache);
preempt_enable();
-#endif
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -287,16 +306,18 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
-#ifdef CONFIG_DST_CACHE
preempt_disable();
dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
preempt_enable();
-#endif
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
+
return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
@@ -315,7 +336,7 @@ static int seg6_build_state(struct nlattr *nla,
int err;
err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
- seg6_iptunnel_policy);
+ seg6_iptunnel_policy, NULL);
if (err < 0)
return err;
@@ -355,13 +376,11 @@ static int seg6_build_state(struct nlattr *nla,
slwt = seg6_lwt_lwtunnel(newts);
-#ifdef CONFIG_DST_CACHE
err = dst_cache_init(&slwt->cache, GFP_KERNEL);
if (err) {
kfree(newts);
return err;
}
-#endif
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
@@ -375,12 +394,10 @@ static int seg6_build_state(struct nlattr *nla,
return 0;
}
-#ifdef CONFIG_DST_CACHE
static void seg6_destroy_state(struct lwtunnel_state *lwt)
{
dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
}
-#endif
static int seg6_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
@@ -414,9 +431,7 @@ static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
static const struct lwtunnel_encap_ops seg6_iptun_ops = {
.build_state = seg6_build_state,
-#ifdef CONFIG_DST_CACHE
.destroy_state = seg6_destroy_state,
-#endif
.output = seg6_output,
.input = seg6_input,
.fill_encap = seg6_fill_encap_info,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 99853c6e33a8..61e5902f0687 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -881,11 +881,12 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
- rt = ip_route_output_ports(tunnel->net, &fl4, NULL,
- dst, tiph->saddr,
- 0, 0,
- IPPROTO_IPV6, RT_TOS(tos),
- tunnel->parms.link);
+ flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
+ RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
+ 0, dst, tiph->saddr, 0, 0,
+ sock_net_uid(tunnel->net, NULL));
+ rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
+
if (IS_ERR(rt)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
@@ -1071,7 +1072,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
}
}
-static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
+static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
+ __u32 fwmark)
{
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
@@ -1085,8 +1087,9 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
ipip6_tunnel_link(sitn, t);
t->parms.iph.ttl = p->iph.ttl;
t->parms.iph.tos = p->iph.tos;
- if (t->parms.link != p->link) {
+ if (t->parms.link != p->link || t->fwmark != fwmark) {
t->parms.link = p->link;
+ t->fwmark = fwmark;
ipip6_tunnel_bind_dev(t->dev);
}
dst_cache_reset(&t->dst_cache);
@@ -1220,7 +1223,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
t = netdev_priv(dev);
}
- ipip6_tunnel_update(t, &p);
+ ipip6_tunnel_update(t, &p, t->fwmark);
}
if (t) {
@@ -1418,7 +1421,8 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
}
static void ipip6_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm *parms,
+ __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -1457,6 +1461,8 @@ static void ipip6_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_PROTO])
parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (data[IFLA_IPTUN_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -1549,7 +1555,7 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- ipip6_netlink_parms(data, &nt->parms);
+ ipip6_netlink_parms(data, &nt->parms, &nt->fwmark);
if (ipip6_tunnel_locate(net, &nt->parms, 0))
return -EEXIST;
@@ -1577,6 +1583,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
+ __u32 fwmark = t->fwmark;
int err;
if (dev == sitn->fb_tunnel_dev)
@@ -1588,7 +1595,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
- ipip6_netlink_parms(data, &p);
+ ipip6_netlink_parms(data, &p, &fwmark);
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
@@ -1602,7 +1609,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
} else
t = netdev_priv(dev);
- ipip6_tunnel_update(t, &p);
+ ipip6_tunnel_update(t, &p, fwmark);
#ifdef CONFIG_IPV6_SIT_6RD
if (ipip6_netlink_6rd_parms(data, &ip6rd))
@@ -1649,6 +1656,8 @@ static size_t ipip6_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_IPTUN_FWMARK */
+ nla_total_size(4) +
0;
}
@@ -1665,7 +1674,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
- nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;
#ifdef CONFIG_IPV6_SIT_6RD
@@ -1715,6 +1725,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
};
static void ipip6_dellink(struct net_device *dev, struct list_head *head)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 49fa2e8c3fa9..8e42e8f54b70 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
}
}
-static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
{
- return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source, tsoff);
+ return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source, tsoff);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_gso_type = SKB_GSO_TCPV6;
ip6_dst_store(sk, dst, NULL, NULL);
- if (tcp_death_row->sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
- tcp_fetch_timewait_stamp(sk, dst);
-
icsk->icsk_ext_hdr_len = 0;
if (opt)
icsk->icsk_ext_hdr_len = opt->opt_flen +
@@ -287,11 +282,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk_set_txhash(sk);
if (likely(!tp->repair)) {
- seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport,
- &tp->tsoffset);
+ seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport,
+ &tp->tsoffset);
if (!tp->write_seq)
tp->write_seq = seq;
}
@@ -727,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct flowi *fl,
- const struct request_sock *req,
- bool *strict)
+ const struct request_sock *req)
{
- if (strict)
- *strict = true;
return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
@@ -757,7 +749,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.cookie_init_seq = cookie_v6_init_sequence,
#endif
.route_req = tcp_v6_route_req,
- .init_seq = tcp_v6_init_sequence,
+ .init_seq_tsoff = tcp_v6_init_seq_and_tsoff,
.send_synack = tcp_v6_send_synack,
};
@@ -1301,8 +1293,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
@@ -1933,8 +1923,9 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
+ .early_demux_handler = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e28082f0a307..04862abfe4ec 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,6 +46,7 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
+#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/busy_poll.h>
#include <net/sock_reuseport.h>
@@ -864,6 +865,69 @@ discard:
return 0;
}
+
+static struct sock *__udp6_lib_demux_lookup(struct net *net,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif)
+{
+ unsigned short hnum = ntohs(loc_port);
+ unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
+ unsigned int slot2 = hash2 & udp_table.mask;
+ struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
+ const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ struct sock *sk;
+
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+ if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ return sk;
+ /* Only check first socket in chain */
+ break;
+ }
+ return NULL;
+}
+
+static void udp_v6_early_demux(struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ const struct udphdr *uh;
+ struct sock *sk;
+ struct dst_entry *dst;
+ int dif = skb->dev->ifindex;
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct udphdr)))
+ return;
+
+ uh = udp_hdr(skb);
+
+ if (skb->pkt_type == PACKET_HOST)
+ sk = __udp6_lib_demux_lookup(net, uh->dest,
+ &ipv6_hdr(skb)->daddr,
+ uh->source, &ipv6_hdr(skb)->saddr,
+ dif);
+ else
+ return;
+
+ if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+ return;
+
+ skb->sk = sk;
+ skb->destructor = sock_efree;
+ dst = READ_ONCE(sk->sk_rx_dst);
+
+ if (dst)
+ dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ if (dst) {
+ if (dst->flags & DST_NOCACHE) {
+ if (likely(atomic_inc_not_zero(&dst->__refcnt)))
+ skb_dst_set(skb, dst);
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+ }
+}
+
static __inline__ int udpv6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
@@ -1378,7 +1442,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-static const struct inet6_protocol udpv6_protocol = {
+static struct inet6_protocol udpv6_protocol = {
+ .early_demux = udp_v6_early_demux,
+ .early_demux_handler = udp_v6_early_demux,
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 4439ee44c8b0..7a92c0f31912 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -13,6 +13,7 @@
#include <net/dst.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
/* Add encapsulation header.
*
@@ -26,6 +27,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
iph = ipv6_hdr(skb);
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
@@ -61,9 +63,41 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb->transport_header += x->props.header_len;
+ ops = rcu_dereference(inet6_offloads[xo->proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
+static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb_reset_transport_header(skb);
+ skb->transport_header -= x->props.header_len;
+ }
+}
+
+
static struct xfrm_mode xfrm6_transport_mode = {
.input = xfrm6_transport_input,
.output = xfrm6_transport_output,
+ .gso_segment = xfrm4_transport_gso_segment,
+ .xmit = xfrm6_transport_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TRANSPORT,
};
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 372855eeaf42..02556e356f87 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -36,6 +36,9 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
struct ipv6hdr *top_iph;
int dsfield;
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
offsetof(struct ipv6hdr, nexthdr);
@@ -96,11 +99,35 @@ out:
return err;
}
+static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __skb_push(skb, skb->mac_len);
+ return skb_mac_gso_segment(skb, features);
+
+}
+
+static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb->network_header = skb->network_header - x->props.header_len;
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ }
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + x->props.header_len);
+}
+
static struct xfrm_mode xfrm6_tunnel_mode = {
.input2 = xfrm6_mode_tunnel_input,
.input = xfrm_prepare_input,
.output2 = xfrm6_mode_tunnel_output,
.output = xfrm6_prepare_output,
+ .gso_segment = xfrm6_mode_tunnel_gso_segment,
+ .xmit = xfrm6_mode_tunnel_xmit,
.owner = THIS_MODULE,
.encap = XFRM_MODE_TUNNEL,
.flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4d09ce6fa90e..8ae87d4ec5ff 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -73,11 +73,16 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
int mtu, ret = 0;
struct dst_entry *dst = skb_dst(skb);
+ if (skb->ignore_df)
+ goto out;
+
mtu = dst_mtu(dst);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (!skb->ignore_df && skb->len > mtu) {
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) &&
+ skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) {
skb->dev = dst->dev;
skb->protocol = htons(ETH_P_IPV6);
@@ -89,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ret = -EMSGSIZE;
}
-
+out:
return ret;
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 31762f76cdb5..deca20fb2ce2 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1707,11 +1707,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct kcm_clone info;
struct socket *newsock = NULL;
- if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- return -EFAULT;
-
err = kcm_clone(sock, &info, &newsock);
-
if (!err) {
if (copy_to_user((void __user *)arg, &info,
sizeof(info))) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index be8cecc65002..c1950bb14735 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3827,7 +3827,6 @@ static inline void pfkey_exit_proc(struct net *net)
static struct xfrm_mgr pfkeyv2_mgr =
{
- .id = "pfkeyv2",
.notify = pfkey_send_notify,
.acquire = pfkey_send_acquire,
.compile_policy = pfkey_compile_policy,
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e37d9554da7b..fa0342574b89 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -120,7 +120,7 @@ static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
return sk->sk_user_data;
}
-static inline struct l2tp_net *l2tp_pernet(struct net *net)
+static inline struct l2tp_net *l2tp_pernet(const struct net *net)
{
BUG_ON(!net);
@@ -217,27 +217,6 @@ static void l2tp_tunnel_sock_put(struct sock *sk)
sock_put(sk);
}
-/* Lookup a session by id in the global session list
- */
-static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
-{
- struct l2tp_net *pn = l2tp_pernet(net);
- struct hlist_head *session_list =
- l2tp_session_id_hash_2(pn, session_id);
- struct l2tp_session *session;
-
- rcu_read_lock_bh();
- hlist_for_each_entry_rcu(session, session_list, global_hlist) {
- if (session->session_id == session_id) {
- rcu_read_unlock_bh();
- return session;
- }
- }
- rcu_read_unlock_bh();
-
- return NULL;
-}
-
/* Session hash list.
* The session_id SHOULD be random according to RFC2661, but several
* L2TP implementations (Cisco and Microsoft) use incrementing
@@ -250,38 +229,10 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
}
-/* Lookup a session by id
- */
-struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
-{
- struct hlist_head *session_list;
- struct l2tp_session *session;
-
- /* In L2TPv3, session_ids are unique over all tunnels and we
- * sometimes need to look them up before we know the
- * tunnel.
- */
- if (tunnel == NULL)
- return l2tp_session_find_2(net, session_id);
-
- session_list = l2tp_session_id_hash(tunnel, session_id);
- read_lock_bh(&tunnel->hlist_lock);
- hlist_for_each_entry(session, session_list, hlist) {
- if (session->session_id == session_id) {
- read_unlock_bh(&tunnel->hlist_lock);
- return session;
- }
- }
- read_unlock_bh(&tunnel->hlist_lock);
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(l2tp_session_find);
-
-/* Like l2tp_session_find() but takes a reference on the returned session.
+/* Lookup a session. A new reference is held on the returned session.
* Optionally calls session->ref() too if do_ref is true.
*/
-struct l2tp_session *l2tp_session_get(struct net *net,
+struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
u32 session_id, bool do_ref)
{
@@ -356,7 +307,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
*/
-struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
+ const char *ifname,
bool do_ref)
{
struct l2tp_net *pn = l2tp_pernet(net);
@@ -427,7 +379,7 @@ exist:
/* Lookup a tunnel by id
*/
-struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
+struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id)
{
struct l2tp_tunnel *tunnel;
struct l2tp_net *pn = l2tp_pernet(net);
@@ -445,7 +397,7 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
-struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
+struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8ce7818c7a9d..eec5ad2ebb93 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -230,18 +230,16 @@ out:
return tunnel;
}
-struct l2tp_session *l2tp_session_get(struct net *net,
+struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
u32 session_id, bool do_ref);
-struct l2tp_session *l2tp_session_find(struct net *net,
- struct l2tp_tunnel *tunnel,
- u32 session_id);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
bool do_ref);
-struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
+ const char *ifname,
bool do_ref);
-struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
-struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
+struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 6fd41d7afe1e..59aba8aeac03 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
#include "l2tp_core.h"
@@ -127,8 +130,13 @@ static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
};
+static struct device_type l2tpeth_type = {
+ .name = "l2tpeth",
+};
+
static void l2tp_eth_dev_setup(struct net_device *dev)
{
+ SET_NETDEV_DEVTYPE(dev, &l2tpeth_type);
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->features |= NETIF_F_LLTX;
@@ -204,8 +212,58 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
}
#endif
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+ struct l2tp_session *session,
+ struct net_device *dev)
+{
+ unsigned int overhead = 0;
+ struct dst_entry *dst;
+ u32 l3_overhead = 0;
+
+ /* if the encap is UDP, account for UDP header size */
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ overhead += sizeof(struct udphdr);
+ dev->needed_headroom += sizeof(struct udphdr);
+ }
+ if (session->mtu != 0) {
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+ return;
+ }
+ lock_sock(tunnel->sock);
+ l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+ release_sock(tunnel->sock);
+ if (l3_overhead == 0) {
+ /* L3 Overhead couldn't be identified, this could be
+ * because tunnel->sock was NULL or the socket's
+ * address family was not IPv4 or IPv6,
+ * dev mtu stays at 1500.
+ */
+ return;
+ }
+ /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+ * UDP overhead, if any, was already factored in above.
+ */
+ overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+ /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+ dst = sk_dst_get(tunnel->sock);
+ if (dst) {
+ /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+ u32 pmtu = dst_mtu(dst);
+
+ if (pmtu != 0)
+ dev->mtu = pmtu;
+ dst_release(dst);
+ }
+ session->mtu = dev->mtu - overhead;
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+}
+
static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
+ unsigned char name_assign_type;
struct net_device *dev;
char name[IFNAMSIZ];
struct l2tp_tunnel *tunnel;
@@ -229,8 +287,11 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
goto out;
}
strlcpy(name, cfg->ifname, IFNAMSIZ);
- } else
+ name_assign_type = NET_NAME_USER;
+ } else {
strcpy(name, L2TP_ETH_DEV_NAME);
+ name_assign_type = NET_NAME_ENUM;
+ }
session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
peer_session_id, cfg);
@@ -239,7 +300,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
goto out;
}
- dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN,
+ dev = alloc_netdev(sizeof(*priv), name, name_assign_type,
l2tp_eth_dev_setup);
if (!dev) {
rc = -ENOMEM;
@@ -247,12 +308,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
}
dev_net_set(dev, net);
- if (session->mtu == 0)
- session->mtu = dev->mtu - session->hdr_len;
- dev->mtu = session->mtu;
- dev->needed_headroom += session->hdr_len;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
+ l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 7e3e669baac4..12cfcd0ca807 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -521,11 +521,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
goto out;
}
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
- session = l2tp_session_find(net, tunnel, session_id);
- if (session) {
- ret = -EEXIST;
- goto out;
- }
if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
ret = -EINVAL;
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 4456559cb056..1b7a4daf283c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -357,14 +357,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
spin_lock_init(&tid_agg_rx->reorder_lock);
/* rx timer */
- tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
- tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer_deferrable(&tid_agg_rx->session_timer);
+ setup_deferrable_timer(&tid_agg_rx->session_timer,
+ sta_rx_agg_session_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* rx reorder timer */
- tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired;
- tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer(&tid_agg_rx->reorder_timer);
+ setup_timer(&tid_agg_rx->reorder_timer,
+ sta_rx_agg_reorder_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* prepare reordering buffer */
tid_agg_rx->reorder_buf =
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 45319cc01121..60e2a62f7bef 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -670,14 +670,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
tid_tx->timeout = timeout;
/* response timer */
- tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
- tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer(&tid_tx->addba_resp_timer);
+ setup_timer(&tid_tx->addba_resp_timer,
+ sta_addba_resp_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* tx timer */
- tid_tx->session_timer.function = sta_tx_agg_session_timer_expired;
- tid_tx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
- init_timer_deferrable(&tid_tx->session_timer);
+ setup_deferrable_timer(&tid_tx->session_timer,
+ sta_tx_agg_session_timer_expired,
+ (unsigned long)&sta->timer_to_tid[tid]);
/* assign a dialog token */
sta->ampdu_mlme.dialog_token_allocator++;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ac879bb17870..d041f78ecee6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3,7 +3,7 @@
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2015-2016 Intel Deutschland GmbH
+ * Copyright (C) 2015-2017 Intel Deutschland GmbH
*
* This file is GPLv2 as found in COPYING.
*/
@@ -22,11 +22,98 @@
#include "mesh.h"
#include "wme.h"
+static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata,
+ struct vif_params *params)
+{
+ bool mu_mimo_groups = false;
+ bool mu_mimo_follow = false;
+
+ if (params->vht_mumimo_groups) {
+ u64 membership;
+
+ BUILD_BUG_ON(sizeof(membership) != WLAN_MEMBERSHIP_LEN);
+
+ memcpy(sdata->vif.bss_conf.mu_group.membership,
+ params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN);
+ memcpy(sdata->vif.bss_conf.mu_group.position,
+ params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN,
+ WLAN_USER_POSITION_LEN);
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS);
+ /* don't care about endianness - just check for 0 */
+ memcpy(&membership, params->vht_mumimo_groups,
+ WLAN_MEMBERSHIP_LEN);
+ mu_mimo_groups = membership != 0;
+ }
+
+ if (params->vht_mumimo_follow_addr) {
+ mu_mimo_follow =
+ is_valid_ether_addr(params->vht_mumimo_follow_addr);
+ ether_addr_copy(sdata->u.mntr.mu_follow_addr,
+ params->vht_mumimo_follow_addr);
+ }
+
+ sdata->vif.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow;
+}
+
+static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
+ struct vif_params *params)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *monitor_sdata;
+
+ /* check flags first */
+ if (params->flags && ieee80211_sdata_running(sdata)) {
+ u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE;
+
+ /*
+ * Prohibit MONITOR_FLAG_COOK_FRAMES and
+ * MONITOR_FLAG_ACTIVE to be changed while the
+ * interface is up.
+ * Else we would need to add a lot of cruft
+ * to update everything:
+ * cooked_mntrs, monitor and all fif_* counters
+ * reconfigure hardware
+ */
+ if ((params->flags & mask) != (sdata->u.mntr.flags & mask))
+ return -EBUSY;
+ }
+
+ /* also validate MU-MIMO change */
+ monitor_sdata = rtnl_dereference(local->monitor_sdata);
+
+ if (!monitor_sdata &&
+ (params->vht_mumimo_groups || params->vht_mumimo_follow_addr))
+ return -EOPNOTSUPP;
+
+ /* apply all changes now - no failures allowed */
+
+ if (monitor_sdata)
+ ieee80211_set_mu_mimo_follow(monitor_sdata, params);
+
+ if (params->flags) {
+ if (ieee80211_sdata_running(sdata)) {
+ ieee80211_adjust_monitor_flags(sdata, -1);
+ sdata->u.mntr.flags = params->flags;
+ ieee80211_adjust_monitor_flags(sdata, 1);
+
+ ieee80211_configure_filter(local);
+ } else {
+ /*
+ * Because the interface is down, ieee80211_do_stop
+ * and ieee80211_do_open take care of "everything"
+ * mentioned in the comment above.
+ */
+ sdata->u.mntr.flags = params->flags;
+ }
+ }
+
+ return 0;
+}
+
static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
const char *name,
unsigned char name_assign_type,
enum nl80211_iftype type,
- u32 *flags,
struct vif_params *params)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
@@ -38,9 +125,14 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
if (err)
return ERR_PTR(err);
- if (type == NL80211_IFTYPE_MONITOR && flags) {
- sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- sdata->u.mntr.flags = *flags;
+ sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+
+ if (type == NL80211_IFTYPE_MONITOR) {
+ err = ieee80211_set_mon_options(sdata, params);
+ if (err) {
+ ieee80211_if_remove(sdata);
+ return NULL;
+ }
}
return wdev;
@@ -55,7 +147,7 @@ static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev)
static int ieee80211_change_iface(struct wiphy *wiphy,
struct net_device *dev,
- enum nl80211_iftype type, u32 *flags,
+ enum nl80211_iftype type,
struct vif_params *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -75,58 +167,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
}
if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_sub_if_data *monitor_sdata;
- u32 mu_mntr_cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
-
- monitor_sdata = rtnl_dereference(local->monitor_sdata);
- if (monitor_sdata &&
- wiphy_ext_feature_isset(wiphy, mu_mntr_cap_flag)) {
- memcpy(monitor_sdata->vif.bss_conf.mu_group.membership,
- params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN);
- memcpy(monitor_sdata->vif.bss_conf.mu_group.position,
- params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN,
- WLAN_USER_POSITION_LEN);
- monitor_sdata->vif.mu_mimo_owner = true;
- ieee80211_bss_info_change_notify(monitor_sdata,
- BSS_CHANGED_MU_GROUPS);
-
- ether_addr_copy(monitor_sdata->u.mntr.mu_follow_addr,
- params->macaddr);
- }
-
- if (!flags)
- return 0;
-
- if (ieee80211_sdata_running(sdata)) {
- u32 mask = MONITOR_FLAG_COOK_FRAMES |
- MONITOR_FLAG_ACTIVE;
-
- /*
- * Prohibit MONITOR_FLAG_COOK_FRAMES and
- * MONITOR_FLAG_ACTIVE to be changed while the
- * interface is up.
- * Else we would need to add a lot of cruft
- * to update everything:
- * cooked_mntrs, monitor and all fif_* counters
- * reconfigure hardware
- */
- if ((*flags & mask) != (sdata->u.mntr.flags & mask))
- return -EBUSY;
-
- ieee80211_adjust_monitor_flags(sdata, -1);
- sdata->u.mntr.flags = *flags;
- ieee80211_adjust_monitor_flags(sdata, 1);
-
- ieee80211_configure_filter(local);
- } else {
- /*
- * Because the interface is down, ieee80211_do_stop
- * and ieee80211_do_open take care of "everything"
- * mentioned in the comment above.
- */
- sdata->u.mntr.flags = *flags;
- }
+ ret = ieee80211_set_mon_options(sdata, params);
+ if (ret)
+ return ret;
}
return 0;
@@ -2042,6 +2085,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
params->basic_rates_len,
&sdata->vif.bss_conf.basic_rates);
changed |= BSS_CHANGED_BASIC_RATES;
+ ieee80211_check_rate_mask(sdata);
}
if (params->ap_isolate >= 0) {
@@ -2630,6 +2674,33 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
bss_conf->cqm_rssi_thold = rssi_thold;
bss_conf->cqm_rssi_hyst = rssi_hyst;
+ bss_conf->cqm_rssi_low = 0;
+ bss_conf->cqm_rssi_high = 0;
+ sdata->u.mgd.last_cqm_event_signal = 0;
+
+ /* tell the driver upon association, unless already associated */
+ if (sdata->u.mgd.associated &&
+ sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
+
+ return 0;
+}
+
+static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy,
+ struct net_device *dev,
+ s32 rssi_low, s32 rssi_high)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_vif *vif = &sdata->vif;
+ struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+
+ if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
+ return -EOPNOTSUPP;
+
+ bss_conf->cqm_rssi_low = rssi_low;
+ bss_conf->cqm_rssi_high = rssi_high;
+ bss_conf->cqm_rssi_thold = 0;
+ bss_conf->cqm_rssi_hyst = 0;
sdata->u.mgd.last_cqm_event_signal = 0;
/* tell the driver upon association, unless already associated */
@@ -2658,6 +2729,21 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return ret;
}
+ /*
+ * If active validate the setting and reject it if it doesn't leave
+ * at least one basic rate usable, since we really have to be able
+ * to send something, and if we're an AP we have to be able to do
+ * so at a basic rate so that all clients can receive it.
+ */
+ if (rcu_access_pointer(sdata->vif.chanctx_conf) &&
+ sdata->vif.bss_conf.chandef.chan) {
+ u32 basic_rates = sdata->vif.bss_conf.basic_rates;
+ enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band;
+
+ if (!(mask->control[band].legacy & basic_rates))
+ return -EINVAL;
+ }
+
for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband = wiphy->bands[i];
int j;
@@ -3639,6 +3725,7 @@ const struct cfg80211_ops mac80211_config_ops = {
.mgmt_tx = ieee80211_mgmt_tx,
.mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait,
.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
+ .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config,
.mgmt_frame_register = ieee80211_mgmt_frame_register,
.set_antenna = ieee80211_set_antenna,
.get_antenna = ieee80211_get_antenna,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 98999d3d5262..e957351976a2 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -425,7 +425,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
cfg80211_chandef_create(&chandef, cbss->channel,
- NL80211_CHAN_WIDTH_20_NOHT);
+ NL80211_CHAN_NO_HT);
chandef.width = sdata->u.ibss.chandef.width;
break;
case NL80211_CHAN_WIDTH_80:
@@ -437,7 +437,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
default:
/* fall back to 20 MHz for unsupported modes */
cfg80211_chandef_create(&chandef, cbss->channel,
- NL80211_CHAN_WIDTH_20_NOHT);
+ NL80211_CHAN_NO_HT);
break;
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0e718437d080..cf6d5abb65a3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -839,6 +839,8 @@ struct txq_info {
struct ieee80211_if_mntr {
u32 flags;
u8 mu_follow_addr[ETH_ALEN] __aligned(2);
+
+ struct list_head list;
};
/**
@@ -1259,6 +1261,7 @@ struct ieee80211_local {
/* see iface.c */
struct list_head interfaces;
+ struct list_head mon_list; /* only that are IFF_UP && !cooked */
struct mutex iflist_mtx;
/*
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5bb0c5012819..3bd5b81f5d81 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -676,7 +676,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
set_bit(SDATA_STATE_RUNNING, &sdata->state);
- if (sdata->vif.type == NL80211_IFTYPE_WDS) {
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_WDS:
/* Create STA entry for the WDS peer */
sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
GFP_KERNEL);
@@ -697,8 +698,17 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
rate_control_rate_init(sta);
netif_carrier_on(dev);
- } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) {
+ break;
+ case NL80211_IFTYPE_P2P_DEVICE:
rcu_assign_pointer(local->p2p_sdata, sdata);
+ break;
+ case NL80211_IFTYPE_MONITOR:
+ if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
+ break;
+ list_add_tail_rcu(&sdata->u.mntr.list, &local->mon_list);
+ break;
+ default:
+ break;
}
/*
@@ -817,6 +827,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_AP:
cancel_work_sync(&sdata->u.ap.request_smps_work);
break;
+ case NL80211_IFTYPE_MONITOR:
+ if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
+ break;
+ list_del_rcu(&sdata->u.mntr.list);
+ break;
default:
break;
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 56fb47953b72..ae408a96c407 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -603,6 +603,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
ARRAY_SIZE(local->ext_capa);
INIT_LIST_HEAD(&local->interfaces);
+ INIT_LIST_HEAD(&local->mon_list);
__hw_addr_init(&local->mc_list);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6e7b6a07b7d5..281d834c7548 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1100,8 +1100,14 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
return;
- if (mesh_matches_local(sdata, &elems))
- mesh_neighbour_update(sdata, mgmt->sa, &elems);
+ if (mesh_matches_local(sdata, &elems)) {
+ mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n",
+ sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal);
+ if (!sdata->u.mesh.user_mpm ||
+ sdata->u.mesh.mshcfg.rssi_threshold == 0 ||
+ sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
+ mesh_neighbour_update(sdata, mgmt->sa, &elems);
+ }
if (ifmsh->sync_ops)
ifmsh->sync_ops->rx_bcn_presp(sdata,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index b747c9645e43..4005edd71fe8 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -16,6 +16,7 @@
#define TEST_FRAME_LEN 8192
#define MAX_METRIC 0xffffffff
#define ARITH_SHIFT 8
+#define LINK_FAIL_THRESH 95
#define MAX_PREQ_QUEUE_LEN 64
@@ -307,10 +308,12 @@ void ieee80211s_update_metric(struct ieee80211_local *local,
failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK);
- /* moving average, scaled to 100 */
- sta->mesh->fail_avg =
- ((80 * sta->mesh->fail_avg + 5) / 100 + 20 * failed);
- if (sta->mesh->fail_avg > 95)
+ /* moving average, scaled to 100.
+ * feed failure as 100 and success as 0
+ */
+ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, failed * 100);
+ if (ewma_mesh_fail_avg_read(&sta->mesh->fail_avg) >
+ LINK_FAIL_THRESH)
mesh_plink_broken(sta);
}
@@ -325,6 +328,8 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
int rate, err;
u32 tx_time, estimated_retx;
u64 result;
+ unsigned long fail_avg =
+ ewma_mesh_fail_avg_read(&sta->mesh->fail_avg);
/* Try to get rate based on HW/SW RC algorithm.
* Rate is returned in units of Kbps, correct this
@@ -336,7 +341,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
if (rate) {
err = 0;
} else {
- if (sta->mesh->fail_avg >= 100)
+ if (fail_avg > LINK_FAIL_THRESH)
return MAX_METRIC;
sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
@@ -344,7 +349,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
if (WARN_ON(!rate))
return MAX_METRIC;
- err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ err = (fail_avg << ARITH_SHIFT) / 100;
}
/* bitrate is in units of 100 Kbps, while we need rate in units of
@@ -484,6 +489,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
? mpath->exp_time : exp_time;
mesh_path_activate(mpath);
spin_unlock_bh(&mpath->state_lock);
+ ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
+ /* init it at a low value - 0 start is tricky */
+ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1);
mesh_path_tx_pending(mpath);
/* draft says preq_id should be saved to, but there does
* not seem to be any use for it, skipping by now
@@ -522,6 +530,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
? mpath->exp_time : exp_time;
mesh_path_activate(mpath);
spin_unlock_bh(&mpath->state_lock);
+ ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
+ /* init it at a low value - 0 start is tricky */
+ ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1);
mesh_path_tx_pending(mpath);
} else
spin_unlock_bh(&mpath->state_lock);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index f0e6175a9821..97269caafecd 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -397,11 +397,10 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata,
new_mpath->sdata = sdata;
new_mpath->flags = 0;
skb_queue_head_init(&new_mpath->frame_queue);
- new_mpath->timer.data = (unsigned long) new_mpath;
- new_mpath->timer.function = mesh_path_timer;
new_mpath->exp_time = jiffies;
spin_lock_init(&new_mpath->state_lock);
- init_timer(&new_mpath->timer);
+ setup_timer(&new_mpath->timer, mesh_path_timer,
+ (unsigned long) new_mpath);
return new_mpath;
}
@@ -829,6 +828,9 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop)
mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID;
mesh_path_activate(mpath);
spin_unlock_bh(&mpath->state_lock);
+ ewma_mesh_fail_avg_init(&next_hop->mesh->fail_avg);
+ /* init it at a low value - 0 start is tricky */
+ ewma_mesh_fail_avg_add(&next_hop->mesh->fail_avg, 1);
mesh_path_tx_pending(mpath);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6e90301154d5..24d69bcf71ad 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1908,6 +1908,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.associated = cbss;
memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
+ ieee80211_check_rate_mask(sdata);
+
sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
if (sdata->vif.p2p ||
@@ -2797,8 +2799,9 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
- sdata_info(sdata, "disassociated from %pM (Reason: %u)\n",
- mgmt->sa, reason_code);
+ sdata_info(sdata, "disassociated from %pM (Reason: %u=%s)\n",
+ mgmt->sa, reason_code,
+ ieee80211_get_reason_code_string(reason_code));
ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
@@ -2822,15 +2825,15 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
*have_higher_than_11mbit = true;
/*
- * BSS_MEMBERSHIP_SELECTOR_HT_PHY is defined in 802.11n-2009
- * 7.3.2.2 as a magic value instead of a rate. Hence, skip it.
+ * Skip HT and VHT BSS membership selectors since they're not
+ * rates.
*
- * Note: Even through the membership selector and the basic
+ * Note: Even though the membership selector and the basic
* rate flag share the same bit, they are not exactly
* the same.
*/
- if (!!(supp_rates[i] & 0x80) &&
- (supp_rates[i] & 0x7f) == BSS_MEMBERSHIP_SELECTOR_HT_PHY)
+ if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) ||
+ supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY))
continue;
for (j = 0; j < sband->n_bitrates; j++) {
@@ -3430,6 +3433,30 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
}
+ if (bss_conf->cqm_rssi_low &&
+ ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
+ int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
+ int last_event = ifmgd->last_cqm_event_signal;
+ int low = bss_conf->cqm_rssi_low;
+ int high = bss_conf->cqm_rssi_high;
+
+ if (sig < low &&
+ (last_event == 0 || last_event >= low)) {
+ ifmgd->last_cqm_event_signal = sig;
+ ieee80211_cqm_rssi_notify(
+ &sdata->vif,
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+ sig, GFP_KERNEL);
+ } else if (sig > high &&
+ (last_event == 0 || last_event <= high)) {
+ ifmgd->last_cqm_event_signal = sig;
+ ieee80211_cqm_rssi_notify(
+ &sdata->vif,
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+ sig, GFP_KERNEL);
+ }
+ }
+
if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) {
mlme_dbg_ratelimited(sdata,
"cancelling AP probe due to a received beacon\n");
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 206698bc93f4..9d7a1cd949fb 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -2,6 +2,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -173,9 +174,11 @@ ieee80211_rate_control_ops_get(const char *name)
/* try default if specific alg requested but not found */
ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo);
- /* try built-in one if specific alg requested but not found */
- if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
+ /* Note: check for > 0 is intentional to avoid clang warning */
+ if (!ops && (strlen(CONFIG_MAC80211_RC_DEFAULT) > 0))
+ /* try built-in one if specific alg requested but not found */
ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
+
kernel_param_unlock(THIS_MODULE);
return ops;
@@ -208,7 +211,6 @@ static struct rate_control_ref *rate_control_alloc(const char *name,
ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL);
if (!ref)
return NULL;
- ref->local = local;
ref->ops = ieee80211_rate_control_ops_get(name);
if (!ref->ops)
goto free;
@@ -229,18 +231,45 @@ free:
return NULL;
}
-static void rate_control_free(struct rate_control_ref *ctrl_ref)
+static void rate_control_free(struct ieee80211_local *local,
+ struct rate_control_ref *ctrl_ref)
{
ctrl_ref->ops->free(ctrl_ref->priv);
#ifdef CONFIG_MAC80211_DEBUGFS
- debugfs_remove_recursive(ctrl_ref->local->debugfs.rcdir);
- ctrl_ref->local->debugfs.rcdir = NULL;
+ debugfs_remove_recursive(local->debugfs.rcdir);
+ local->debugfs.rcdir = NULL;
#endif
kfree(ctrl_ref);
}
+void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_supported_band *sband;
+ u32 user_mask, basic_rates = sdata->vif.bss_conf.basic_rates;
+ enum nl80211_band band;
+
+ if (WARN_ON(!sdata->vif.bss_conf.chandef.chan))
+ return;
+
+ if (WARN_ON_ONCE(!basic_rates))
+ return;
+
+ band = sdata->vif.bss_conf.chandef.chan->band;
+ user_mask = sdata->rc_rateidx_mask[band];
+ sband = local->hw.wiphy->bands[band];
+
+ if (user_mask & basic_rates)
+ return;
+
+ sdata_dbg(sdata,
+ "no overlap between basic rates (0x%x) and user mask (0x%x on band %d) - clearing the latter",
+ basic_rates, user_mask, band);
+ sdata->rc_rateidx_mask[band] = (1 << sband->n_bitrates) - 1;
+}
+
static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
{
struct sk_buff *skb = txrc->skb;
@@ -936,6 +965,6 @@ void rate_control_deinitialize(struct ieee80211_local *local)
return;
local->rate_ctrl = NULL;
- rate_control_free(ref);
+ rate_control_free(local, ref);
}
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 8d3260785b94..f7825ef5f871 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -20,7 +20,6 @@
#include "driver-ops.h"
struct rate_control_ref {
- struct ieee80211_local *local;
const struct rate_control_ops *ops;
void *priv;
};
@@ -111,6 +110,8 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
#endif
}
+void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
+
/* Get a reference to the rate control algorithm. If `name' is NULL, get the
* first available algorithm. */
int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4d7543d1a62c..2142074d9fb0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -95,24 +95,13 @@ static u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
* This function cleans up the SKB, i.e. it removes all the stuff
* only useful for monitoring.
*/
-static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
- struct sk_buff *skb,
- unsigned int rtap_vendor_space)
+static void remove_monitor_info(struct sk_buff *skb,
+ unsigned int present_fcs_len,
+ unsigned int rtap_vendor_space)
{
- if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
- if (likely(skb->len > FCS_LEN))
- __pskb_trim(skb, skb->len - FCS_LEN);
- else {
- /* driver bug */
- WARN_ON(1);
- dev_kfree_skb(skb);
- return NULL;
- }
- }
-
+ if (present_fcs_len)
+ __pskb_trim(skb, skb->len - present_fcs_len);
__pskb_pull(skb, rtap_vendor_space);
-
- return skb;
}
static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
@@ -578,8 +567,15 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
* the SKB because it has a bad FCS/PLCP checksum.
*/
- if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS))
+ if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
+ if (unlikely(origskb->len <= FCS_LEN)) {
+ /* driver bug */
+ WARN_ON(1);
+ dev_kfree_skb(origskb);
+ return NULL;
+ }
present_fcs_len = FCS_LEN;
+ }
/* ensure hdr->frame_control and vendor radiotap data are in skb head */
if (!pskb_may_pull(origskb, 2 + rtap_vendor_space)) {
@@ -594,7 +590,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
return NULL;
}
- return remove_monitor_info(local, origskb, rtap_vendor_space);
+ remove_monitor_info(origskb, present_fcs_len,
+ rtap_vendor_space);
+ return origskb;
}
ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space);
@@ -626,9 +624,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
* and FCS from the original.
*/
skb = skb_copy_expand(origskb, needed_headroom, 0, GFP_ATOMIC);
-
- origskb = remove_monitor_info(local, origskb,
- rtap_vendor_space);
+ remove_monitor_info(origskb, present_fcs_len,
+ rtap_vendor_space);
if (!skb)
return origskb;
@@ -642,16 +639,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
skb->pkt_type = PACKET_OTHERHOST;
skb->protocol = htons(ETH_P_802_2);
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
- continue;
-
- if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
- continue;
-
- if (!ieee80211_sdata_running(sdata))
- continue;
-
+ list_for_each_entry_rcu(sdata, &local->mon_list, u.mntr.list) {
if (prev_dev) {
skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2) {
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 97f4c9d6b54c..0782e486fe89 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -132,9 +132,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_vht_operation vht_oper = {
.chan_width =
wide_bw_chansw_ie->new_channel_width,
- .center_freq_seg1_idx =
+ .center_freq_seg0_idx =
wide_bw_chansw_ie->new_center_freq_seg0,
- .center_freq_seg2_idx =
+ .center_freq_seg1_idx =
wide_bw_chansw_ie->new_center_freq_seg1,
/* .basic_mcs_set doesn't matter */
};
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 3323a2fb289b..81ec1f72518d 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1960,14 +1960,17 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
rinfo->bw = (rate & STA_STATS_RATE_BW_MASK) >>
STA_STATS_RATE_BW_SHIFT;
- if (rate & STA_STATS_RATE_VHT) {
+ switch (rate & STA_STATS_RATE_TYPE_MASK) {
+ case STA_STATS_RATE_TYPE_VHT:
rinfo->flags = RATE_INFO_FLAGS_VHT_MCS;
rinfo->mcs = rate & 0xf;
rinfo->nss = (rate & 0xf0) >> 4;
- } else if (rate & STA_STATS_RATE_HT) {
+ break;
+ case STA_STATS_RATE_TYPE_HT:
rinfo->flags = RATE_INFO_FLAGS_MCS;
rinfo->mcs = rate & 0xff;
- } else if (rate & STA_STATS_RATE_LEGACY) {
+ break;
+ case STA_STATS_RATE_TYPE_LEGACY: {
struct ieee80211_supported_band *sband;
u16 brate;
unsigned int shift;
@@ -1982,6 +1985,8 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
else
shift = 0;
rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
+ break;
+ }
}
if (rate & STA_STATS_RATE_SGI)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e65cda34d2bc..8949266d7bc3 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -324,6 +324,9 @@ struct ieee80211_fast_rx {
struct rcu_head rcu_head;
};
+/* we use only values in the range 0-100, so pick a large precision */
+DECLARE_EWMA(mesh_fail_avg, 20, 8)
+
/**
* struct mesh_sta - mesh STA information
* @plink_lock: serialize access to plink fields
@@ -369,7 +372,7 @@ struct mesh_sta {
enum nl80211_mesh_power_mode nonpeer_pm;
/* moving percentage of failed MSDUs */
- unsigned int fail_avg;
+ struct ewma_mesh_fail_avg fail_avg;
};
DECLARE_EWMA(signal, 10, 8)
@@ -725,9 +728,10 @@ void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta);
unsigned long ieee80211_sta_last_active(struct sta_info *sta);
#define STA_STATS_RATE_INVALID 0
-#define STA_STATS_RATE_VHT 0x8000
-#define STA_STATS_RATE_HT 0x4000
-#define STA_STATS_RATE_LEGACY 0x2000
+#define STA_STATS_RATE_TYPE_MASK 0xC000
+#define STA_STATS_RATE_TYPE_LEGACY 0x4000
+#define STA_STATS_RATE_TYPE_HT 0x8000
+#define STA_STATS_RATE_TYPE_VHT 0xC000
#define STA_STATS_RATE_SGI 0x1000
#define STA_STATS_RATE_BW_SHIFT 9
#define STA_STATS_RATE_BW_MASK (0x7 << STA_STATS_RATE_BW_SHIFT)
@@ -753,11 +757,11 @@ static inline u16 sta_stats_encode_rate(struct ieee80211_rx_status *s)
r |= STA_STATS_RATE_SGI;
if (s->flag & RX_FLAG_VHT)
- r |= STA_STATS_RATE_VHT | (s->vht_nss << 4);
+ r |= STA_STATS_RATE_TYPE_VHT | (s->vht_nss << 4);
else if (s->flag & RX_FLAG_HT)
- r |= STA_STATS_RATE_HT;
+ r |= STA_STATS_RATE_TYPE_HT;
else
- r |= STA_STATS_RATE_LEGACY | (s->band << 4);
+ r |= STA_STATS_RATE_TYPE_LEGACY | (s->band << 4);
return r;
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ba8d7db0a071..f27719eeeed7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -682,10 +682,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
- if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
- txrc.max_rate_idx = -1;
- else
- txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
if (tx->sdata->rc_has_mcs_mask[info->band])
txrc.rate_idx_mcs_mask =
@@ -4249,10 +4245,6 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
txrc.skb = skb;
txrc.reported_rate.idx = -1;
txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
- if (txrc.rate_idx_mask == (1 << txrc.sband->n_bitrates) - 1)
- txrc.max_rate_idx = -1;
- else
- txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
txrc.bss = true;
rate_control_get_rate(sdata, NULL, &txrc);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ac59fbd280df..7a37ce78bb38 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2413,13 +2413,13 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
*pos++ = WLAN_EID_VHT_OPERATION;
*pos++ = sizeof(struct ieee80211_vht_operation);
vht_oper = (struct ieee80211_vht_operation *)pos;
- vht_oper->center_freq_seg1_idx = ieee80211_frequency_to_channel(
+ vht_oper->center_freq_seg0_idx = ieee80211_frequency_to_channel(
chandef->center_freq1);
if (chandef->center_freq2)
- vht_oper->center_freq_seg2_idx =
+ vht_oper->center_freq_seg1_idx =
ieee80211_frequency_to_channel(chandef->center_freq2);
else
- vht_oper->center_freq_seg2_idx = 0x00;
+ vht_oper->center_freq_seg1_idx = 0x00;
switch (chandef->width) {
case NL80211_CHAN_WIDTH_160:
@@ -2428,11 +2428,11 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
* workaround.
*/
vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
- vht_oper->center_freq_seg2_idx = vht_oper->center_freq_seg1_idx;
+ vht_oper->center_freq_seg1_idx = vht_oper->center_freq_seg0_idx;
if (chandef->chan->center_freq < chandef->center_freq1)
- vht_oper->center_freq_seg1_idx -= 8;
+ vht_oper->center_freq_seg0_idx -= 8;
else
- vht_oper->center_freq_seg1_idx += 8;
+ vht_oper->center_freq_seg0_idx += 8;
break;
case NL80211_CHAN_WIDTH_80P80:
/*
@@ -2491,9 +2491,9 @@ bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
if (!oper)
return false;
- cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
+ cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg0_idx,
chandef->chan->band);
- cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg2_idx,
+ cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
chandef->chan->band);
switch (oper->chan_width) {
@@ -2503,11 +2503,11 @@ bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
new.width = NL80211_CHAN_WIDTH_80;
new.center_freq1 = cf1;
/* If needed, adjust based on the newer interop workaround. */
- if (oper->center_freq_seg2_idx) {
+ if (oper->center_freq_seg1_idx) {
unsigned int diff;
- diff = abs(oper->center_freq_seg2_idx -
- oper->center_freq_seg1_idx);
+ diff = abs(oper->center_freq_seg1_idx -
+ oper->center_freq_seg0_idx);
if (diff == 8) {
new.width = NL80211_CHAN_WIDTH_160;
new.center_freq1 = cf2;
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 56ccffa3f2bf..62141dcec2d6 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -19,6 +19,7 @@
#ifndef __IEEE802154_I_H
#define __IEEE802154_I_H
+#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/hrtimer.h>
#include <net/cfg802154.h>
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 6414079aa729..088e2b459d0f 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -24,6 +24,9 @@
#include <net/nexthop.h>
#include "internal.h"
+/* max memory we will use for mpls_route */
+#define MAX_MPLS_ROUTE_MEM 4096
+
/* Maximum number of labels to look ahead at when selecting a path of
* a multipath route
*/
@@ -32,7 +35,9 @@
#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
static int zero = 0;
+static int one = 1;
static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -58,10 +63,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible);
static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
{
- u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
- int nh_index = nh - rt->rt_nh;
-
- return nh0_via + rt->rt_max_alen * nh_index;
+ return (u8 *)nh + rt->rt_via_offset;
}
static const u8 *mpls_nh_via(const struct mpls_route *rt,
@@ -187,21 +189,32 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
return hash;
}
+static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index)
+{
+ return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size);
+}
+
+/* number of alive nexthops (rt->rt_nhn_alive) and the flags for
+ * a next hop (nh->nh_flags) are modified by netdev event handlers.
+ * Since those fields can change at any moment, use READ_ONCE to
+ * access both.
+ */
static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
struct sk_buff *skb)
{
- int alive = ACCESS_ONCE(rt->rt_nhn_alive);
u32 hash = 0;
int nh_index = 0;
int n = 0;
+ u8 alive;
/* No need to look further into packet if there's only
* one path
*/
if (rt->rt_nhn == 1)
- goto out;
+ return rt->rt_nh;
- if (alive <= 0)
+ alive = READ_ONCE(rt->rt_nhn_alive);
+ if (alive == 0)
return NULL;
hash = mpls_multipath_hash(rt, skb);
@@ -209,7 +222,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
if (alive == rt->rt_nhn)
goto out;
for_nexthops(rt) {
- if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ unsigned int nh_flags = READ_ONCE(nh->nh_flags);
+
+ if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
continue;
if (n == nh_index)
return nh;
@@ -217,11 +232,11 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
} endfor_nexthops(rt);
out:
- return &rt->rt_nh[nh_index];
+ return mpls_get_nexthop(rt, nh_index);
}
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
- struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+ struct sk_buff *skb, struct mpls_entry_decoded dec)
{
enum mpls_payload_type payload_type;
bool success = false;
@@ -246,22 +261,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
switch (payload_type) {
case MPT_IPV4: {
struct iphdr *hdr4 = ip_hdr(skb);
+ u8 new_ttl;
skb->protocol = htons(ETH_P_IP);
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * TTL, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ new_ttl = dec.ttl;
+ else
+ new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
+
csum_replace2(&hdr4->check,
htons(hdr4->ttl << 8),
- htons(dec.ttl << 8));
- hdr4->ttl = dec.ttl;
+ htons(new_ttl << 8));
+ hdr4->ttl = new_ttl;
success = true;
break;
}
case MPT_IPV6: {
struct ipv6hdr *hdr6 = ipv6_hdr(skb);
skb->protocol = htons(ETH_P_IPV6);
- hdr6->hop_limit = dec.ttl;
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * hop limit, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ hdr6->hop_limit = dec.ttl;
+ else if (hdr6->hop_limit)
+ hdr6->hop_limit = hdr6->hop_limit - 1;
success = true;
break;
}
case MPT_UNSPEC:
+ /* Should have decided which protocol it is by now */
break;
}
@@ -361,7 +400,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!new_header_size && dec.bos)) {
/* Penultimate hop popping */
- if (!mpls_egress(rt, skb, dec))
+ if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
goto err;
} else {
bool bos;
@@ -412,6 +451,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
[RTA_DST] = { .type = NLA_U32 },
[RTA_OIF] = { .type = NLA_U32 },
+ [RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
};
struct mpls_route_config {
@@ -421,6 +461,7 @@ struct mpls_route_config {
u8 rc_via_alen;
u8 rc_via[MAX_VIA_ALEN];
u32 rc_label;
+ u8 rc_ttl_propagate;
u8 rc_output_labels;
u32 rc_output_label[MAX_NEW_LABELS];
u32 rc_nlflags;
@@ -430,20 +471,27 @@ struct mpls_route_config {
int rc_mp_len;
};
-static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
+/* all nexthops within a route have the same size based on max
+ * number of labels and max via length for a hop
+ */
+static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels)
{
- u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
+ u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen);
struct mpls_route *rt;
+ size_t size;
- rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
- VIA_ALEN_ALIGN) +
- num_nh * max_alen_aligned,
- GFP_KERNEL);
- if (rt) {
- rt->rt_nhn = num_nh;
- rt->rt_nhn_alive = num_nh;
- rt->rt_max_alen = max_alen_aligned;
- }
+ size = sizeof(*rt) + num_nh * nh_size;
+ if (size > MAX_MPLS_ROUTE_MEM)
+ return ERR_PTR(-EINVAL);
+
+ rt = kzalloc(size, GFP_KERNEL);
+ if (!rt)
+ return ERR_PTR(-ENOMEM);
+
+ rt->rt_nhn = num_nh;
+ rt->rt_nhn_alive = num_nh;
+ rt->rt_nh_size = nh_size;
+ rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels);
return rt;
}
@@ -648,9 +696,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
return -ENOMEM;
err = -EINVAL;
- /* Ensure only a supported number of labels are present */
- if (cfg->rc_output_labels > MAX_NEW_LABELS)
- goto errout;
nh->nh_labels = cfg->rc_output_labels;
for (i = 0; i < nh->nh_labels; i++)
@@ -675,7 +720,7 @@ errout:
static int mpls_nh_build(struct net *net, struct mpls_route *rt,
struct mpls_nh *nh, int oif, struct nlattr *via,
- struct nlattr *newdst)
+ struct nlattr *newdst, u8 max_labels)
{
int err = -ENOMEM;
@@ -683,7 +728,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
goto errout;
if (newdst) {
- err = nla_get_labels(newdst, MAX_NEW_LABELS,
+ err = nla_get_labels(newdst, max_labels,
&nh->nh_labels, nh->nh_label);
if (err)
goto errout;
@@ -708,22 +753,20 @@ errout:
return err;
}
-static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
- u8 cfg_via_alen, u8 *max_via_alen)
+static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
+ u8 cfg_via_alen, u8 *max_via_alen,
+ u8 *max_labels)
{
- int nhs = 0;
int remaining = len;
-
- if (!rtnh) {
- *max_via_alen = cfg_via_alen;
- return 1;
- }
+ u8 nhs = 0;
*max_via_alen = 0;
+ *max_labels = 0;
while (rtnh_ok(rtnh, remaining)) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
int attrlen;
+ u8 n_labels = 0;
attrlen = rtnh_attrlen(rtnh);
nla = nla_find(attrs, attrlen, RTA_VIA);
@@ -737,7 +780,20 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
via_alen);
}
+ nla = nla_find(attrs, attrlen, RTA_NEWDST);
+ if (nla &&
+ nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0)
+ return 0;
+
+ *max_labels = max_t(u8, *max_labels, n_labels);
+
+ /* number of nexthops is tracked by a u8.
+ * Check for overflow.
+ */
+ if (nhs == 255)
+ return 0;
nhs++;
+
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -746,13 +802,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
}
static int mpls_nh_build_multi(struct mpls_route_config *cfg,
- struct mpls_route *rt)
+ struct mpls_route *rt, u8 max_labels)
{
struct rtnexthop *rtnh = cfg->rc_mp;
struct nlattr *nla_via, *nla_newdst;
int remaining = cfg->rc_mp_len;
- int nhs = 0;
int err = 0;
+ u8 nhs = 0;
change_nexthops(rt) {
int attrlen;
@@ -779,7 +835,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
}
err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
- rtnh->rtnh_ifindex, nla_via, nla_newdst);
+ rtnh->rtnh_ifindex, nla_via, nla_newdst,
+ max_labels);
if (err)
goto errout;
@@ -806,7 +863,8 @@ static int mpls_route_add(struct mpls_route_config *cfg)
int err = -EINVAL;
u8 max_via_alen;
unsigned index;
- int nhs;
+ u8 max_labels;
+ u8 nhs;
index = cfg->rc_label;
@@ -844,21 +902,32 @@ static int mpls_route_add(struct mpls_route_config *cfg)
goto errout;
err = -EINVAL;
- nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
- cfg->rc_via_alen, &max_via_alen);
+ if (cfg->rc_mp) {
+ nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
+ cfg->rc_via_alen, &max_via_alen,
+ &max_labels);
+ } else {
+ max_via_alen = cfg->rc_via_alen;
+ max_labels = cfg->rc_output_labels;
+ nhs = 1;
+ }
+
if (nhs == 0)
goto errout;
err = -ENOMEM;
- rt = mpls_rt_alloc(nhs, max_via_alen);
- if (!rt)
+ rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
goto errout;
+ }
rt->rt_protocol = cfg->rc_protocol;
rt->rt_payload_type = cfg->rc_payload_type;
+ rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
if (cfg->rc_mp)
- err = mpls_nh_build_multi(cfg, rt);
+ err = mpls_nh_build_multi(cfg, rt, max_labels);
else
err = mpls_nh_build_from_cfg(cfg, rt);
if (err)
@@ -1011,8 +1080,8 @@ static int mpls_netconf_msgsize_devconf(int type)
return size;
}
-static void mpls_netconf_notify_devconf(struct net *net, int type,
- struct mpls_dev *mdev)
+static void mpls_netconf_notify_devconf(struct net *net, int event,
+ int type, struct mpls_dev *mdev)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1021,8 +1090,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int type,
if (!skb)
goto errout;
- err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF,
- 0, type);
+ err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -1042,7 +1110,8 @@ static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
};
static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX + 1];
@@ -1054,7 +1123,7 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_mpls_policy);
+ devconf_mpls_policy, NULL);
if (err < 0)
goto errout;
@@ -1155,9 +1224,8 @@ static int mpls_conf_proc(struct ctl_table *ctl, int write,
if (i == offsetof(struct mpls_dev, input_enabled) &&
val != oval) {
- mpls_netconf_notify_devconf(net,
- NETCONFA_INPUT,
- mdev);
+ mpls_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_INPUT, mdev);
}
}
@@ -1198,10 +1266,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
- mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
+ mdev->sysctl = register_net_sysctl(net, path, table);
if (!mdev->sysctl)
goto free;
+ mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev);
return 0;
free:
@@ -1210,13 +1279,17 @@ out:
return -ENOBUFS;
}
-static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
+static void mpls_dev_sysctl_unregister(struct net_device *dev,
+ struct mpls_dev *mdev)
{
+ struct net *net = dev_net(dev);
struct ctl_table *table;
table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl);
kfree(table);
+
+ mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev);
}
static struct mpls_dev *mpls_add_dev(struct net_device *dev)
@@ -1242,11 +1315,12 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev)
u64_stats_init(&mpls_stats->syncp);
}
+ mdev->dev = dev;
+
err = mpls_dev_sysctl_register(dev, mdev);
if (err)
goto free;
- mdev->dev = dev;
rcu_assign_pointer(dev->mpls_ptr, mdev);
return mdev;
@@ -1269,8 +1343,7 @@ static void mpls_ifdown(struct net_device *dev, int event)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
- unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN;
- unsigned int alive;
+ u8 alive, deleted;
unsigned index;
platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -1281,36 +1354,48 @@ static void mpls_ifdown(struct net_device *dev, int event)
continue;
alive = 0;
+ deleted = 0;
change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
+
if (rtnl_dereference(nh->nh_dev) != dev)
goto next;
switch (event) {
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
- nh->nh_flags |= RTNH_F_DEAD;
+ nh_flags |= RTNH_F_DEAD;
/* fall through */
case NETDEV_CHANGE:
- nh->nh_flags |= RTNH_F_LINKDOWN;
+ nh_flags |= RTNH_F_LINKDOWN;
break;
}
if (event == NETDEV_UNREGISTER)
RCU_INIT_POINTER(nh->nh_dev, NULL);
+
+ if (nh->nh_flags != nh_flags)
+ WRITE_ONCE(nh->nh_flags, nh_flags);
next:
- if (!(nh->nh_flags & nh_flags))
+ if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
alive++;
+ if (!rtnl_dereference(nh->nh_dev))
+ deleted++;
} endfor_nexthops(rt);
WRITE_ONCE(rt->rt_nhn_alive, alive);
+
+ /* if there are no more nexthops, delete the route */
+ if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
+ mpls_route_update(net, index, NULL, NULL);
}
}
-static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
+static void mpls_ifup(struct net_device *dev, unsigned int flags)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
unsigned index;
- int alive;
+ u8 alive;
platform_label = rtnl_dereference(net->mpls.platform_label);
for (index = 0; index < net->mpls.platform_labels; index++) {
@@ -1321,20 +1406,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
alive = 0;
change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
struct net_device *nh_dev =
rtnl_dereference(nh->nh_dev);
- if (!(nh->nh_flags & nh_flags)) {
+ if (!(nh_flags & flags)) {
alive++;
continue;
}
if (nh_dev != dev)
continue;
alive++;
- nh->nh_flags &= ~nh_flags;
+ nh_flags &= ~flags;
+ WRITE_ONCE(nh->nh_flags, flags);
} endfor_nexthops(rt);
- ACCESS_ONCE(rt->rt_nhn_alive) = alive;
+ WRITE_ONCE(rt->rt_nhn_alive, alive);
}
}
@@ -1385,7 +1472,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
mpls_ifdown(dev, event);
mdev = mpls_dev_get(dev);
if (mdev) {
- mpls_dev_sysctl_unregister(mdev);
+ mpls_dev_sysctl_unregister(dev, mdev);
RCU_INIT_POINTER(dev->mpls_ptr, NULL);
call_rcu(&mdev->rcu, mpls_dev_destroy_rcu);
}
@@ -1395,7 +1482,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
if (mdev) {
int err;
- mpls_dev_sysctl_unregister(mdev);
+ mpls_dev_sysctl_unregister(dev, mdev);
err = mpls_dev_sysctl_register(dev, mdev);
if (err)
return notifier_from_errno(err);
@@ -1455,16 +1542,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
EXPORT_SYMBOL_GPL(nla_put_labels);
int nla_get_labels(const struct nlattr *nla,
- u32 max_labels, u8 *labels, u32 label[])
+ u8 max_labels, u8 *labels, u32 label[])
{
unsigned len = nla_len(nla);
- unsigned nla_labels;
struct mpls_shim_hdr *nla_label;
+ u8 nla_labels;
bool bos;
int i;
- /* len needs to be an even multiple of 4 (the label size) */
- if (len & 3)
+ /* len needs to be an even multiple of 4 (the label size). Number
+ * of labels is a u8 so check for overflow.
+ */
+ if (len & 3 || len / 4 > 255)
return -EINVAL;
/* Limit the number of new labels allowed */
@@ -1472,6 +1561,10 @@ int nla_get_labels(const struct nlattr *nla,
if (nla_labels > max_labels)
return -EINVAL;
+ /* when label == NULL, caller wants number of labels */
+ if (!label)
+ goto out;
+
nla_label = nla_data(nla);
bos = true;
for (i = nla_labels - 1; i >= 0; i--, bos = false) {
@@ -1495,6 +1588,7 @@ int nla_get_labels(const struct nlattr *nla,
label[i] = dec.label;
}
+out:
*labels = nla_labels;
return 0;
}
@@ -1550,13 +1644,13 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
int index;
int err;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy,
+ NULL);
if (err < 0)
goto errout;
err = -EINVAL;
rtm = nlmsg_data(nlh);
- memset(cfg, 0, sizeof(*cfg));
if (rtm->rtm_family != AF_MPLS)
goto errout;
@@ -1584,6 +1678,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_label = LABEL_NOT_SPECIFIED;
cfg->rc_protocol = rtm->rtm_protocol;
cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
+ cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
cfg->rc_nlflags = nlh->nlmsg_flags;
cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->rc_nlinfo.nlh = nlh;
@@ -1630,6 +1725,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_mp_len = nla_len(nla);
break;
}
+ case RTA_TTL_PROPAGATE:
+ {
+ u8 ttl_propagate = nla_get_u8(nla);
+
+ if (ttl_propagate > 1)
+ goto errout;
+ cfg->rc_ttl_propagate = ttl_propagate ?
+ MPLS_TTL_PROP_ENABLED :
+ MPLS_TTL_PROP_DISABLED;
+ break;
+ }
default:
/* Unsupported attribute */
goto errout;
@@ -1641,29 +1747,47 @@ errout:
return err;
}
-static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- struct mpls_route_config cfg;
+ struct mpls_route_config *cfg;
int err;
- err = rtm_to_route_config(skb, nlh, &cfg);
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ err = rtm_to_route_config(skb, nlh, cfg);
if (err < 0)
- return err;
+ goto out;
+
+ err = mpls_route_del(cfg);
+out:
+ kfree(cfg);
- return mpls_route_del(&cfg);
+ return err;
}
-static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- struct mpls_route_config cfg;
+ struct mpls_route_config *cfg;
int err;
- err = rtm_to_route_config(skb, nlh, &cfg);
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ err = rtm_to_route_config(skb, nlh, cfg);
if (err < 0)
- return err;
+ goto out;
- return mpls_route_add(&cfg);
+ err = mpls_route_add(cfg);
+out:
+ kfree(cfg);
+
+ return err;
}
static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
@@ -1690,6 +1814,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (nla_put_labels(skb, RTA_DST, 1, &label))
goto nla_put_failure;
+
+ if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
+ bool ttl_propagate =
+ rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
+
+ if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
+ ttl_propagate))
+ goto nla_put_failure;
+ }
if (rt->rt_nhn == 1) {
const struct mpls_nh *nh = rt->rt_nh;
@@ -1711,21 +1844,23 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
} else {
struct rtnexthop *rtnh;
struct nlattr *mp;
- int dead = 0;
- int linkdown = 0;
+ u8 linkdown = 0;
+ u8 dead = 0;
mp = nla_nest_start(skb, RTA_MULTIPATH);
if (!mp)
goto nla_put_failure;
for_nexthops(rt) {
+ dev = rtnl_dereference(nh->nh_dev);
+ if (!dev)
+ continue;
+
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
- dev = rtnl_dereference(nh->nh_dev);
- if (dev)
- rtnh->rtnh_ifindex = dev->ifindex;
+ rtnh->rtnh_ifindex = dev->ifindex;
if (nh->nh_flags & RTNH_F_LINKDOWN) {
rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
linkdown++;
@@ -1800,7 +1935,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
{
size_t payload =
NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(4); /* RTA_DST */
+ + nla_total_size(4) /* RTA_DST */
+ + nla_total_size(1); /* RTA_TTL_PROPAGATE */
if (rt->rt_nhn == 1) {
struct mpls_nh *nh = rt->rt_nh;
@@ -1816,6 +1952,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
size_t nhsize = 0;
for_nexthops(rt) {
+ if (!rtnl_dereference(nh->nh_dev))
+ continue;
nhsize += nla_total_size(sizeof(struct rtnexthop));
/* RTA_VIA */
if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
@@ -1878,12 +2016,13 @@ static int resize_platform_label_table(struct net *net, size_t limit)
/* In case the predefined labels need to be populated */
if (limit > MPLS_LABEL_IPV4NULL) {
struct net_device *lo = net->loopback_dev;
- rt0 = mpls_rt_alloc(1, lo->addr_len);
- if (!rt0)
+ rt0 = mpls_rt_alloc(1, lo->addr_len, 0);
+ if (IS_ERR(rt0))
goto nort0;
RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
rt0->rt_payload_type = MPT_IPV4;
+ rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt0->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
@@ -1891,12 +2030,13 @@ static int resize_platform_label_table(struct net *net, size_t limit)
}
if (limit > MPLS_LABEL_IPV6NULL) {
struct net_device *lo = net->loopback_dev;
- rt2 = mpls_rt_alloc(1, lo->addr_len);
- if (!rt2)
+ rt2 = mpls_rt_alloc(1, lo->addr_len, 0);
+ if (IS_ERR(rt2))
goto nort2;
RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
rt2->rt_payload_type = MPT_IPV6;
+ rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt2->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
@@ -1978,6 +2118,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
return ret;
}
+#define MPLS_NS_SYSCTL_OFFSET(field) \
+ (&((struct net *)0)->field)
+
static const struct ctl_table mpls_table[] = {
{
.procname = "platform_labels",
@@ -1986,21 +2129,47 @@ static const struct ctl_table mpls_table[] = {
.mode = 0644,
.proc_handler = mpls_platform_labels,
},
+ {
+ .procname = "ip_ttl_propagate",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "default_ttl",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &ttl_max,
+ },
{ }
};
static int mpls_net_init(struct net *net)
{
struct ctl_table *table;
+ int i;
net->mpls.platform_labels = 0;
net->mpls.platform_label = NULL;
+ net->mpls.ip_ttl_propagate = 1;
+ net->mpls.default_ttl = 255;
table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
if (table == NULL)
return -ENOMEM;
- table[0].data = net;
+ /* Table data contains only offsets relative to the base of
+ * the mdev at this point, so make them absolute.
+ */
+ for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+ table[i].data = (char *)net + (uintptr_t)table[i].data;
+
net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
if (net->mpls.ctl == NULL) {
kfree(table);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 76360d8b9579..4db6a5971322 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -2,6 +2,11 @@
#define MPLS_INTERNAL_H
#include <net/mpls.h>
+/* put a reasonable limit on the number of labels
+ * we will accept from userspace
+ */
+#define MAX_NEW_LABELS 30
+
struct mpls_entry_decoded {
u32 label;
u8 ttl;
@@ -64,7 +69,6 @@ struct mpls_dev {
struct sk_buff;
#define LABEL_NOT_SPECIFIED (1 << 20)
-#define MAX_NEW_LABELS 2
/* This maximum ha length copied from the definition of struct neighbour */
#define VIA_ALEN_ALIGN sizeof(unsigned long)
@@ -83,11 +87,35 @@ enum mpls_payload_type {
struct mpls_nh { /* next hop label forwarding entry */
struct net_device __rcu *nh_dev;
+
+ /* nh_flags is accessed under RCU in the packet path; it is
+ * modified handling netdev events with rtnl lock held
+ */
unsigned int nh_flags;
- u32 nh_label[MAX_NEW_LABELS];
u8 nh_labels;
u8 nh_via_alen;
u8 nh_via_table;
+ u8 nh_reserved1;
+
+ u32 nh_label[0];
+};
+
+/* offset of via from beginning of mpls_nh */
+#define MPLS_NH_VIA_OFF(num_labels) \
+ ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \
+ VIA_ALEN_ALIGN)
+
+/* all nexthops within a route have the same size based on the
+ * max number of labels and max via length across all nexthops
+ */
+#define MPLS_NH_SIZE(num_labels, max_via_alen) \
+ (MPLS_NH_VIA_OFF((num_labels)) + \
+ ALIGN((max_via_alen), VIA_ALEN_ALIGN))
+
+enum mpls_ttl_propagation {
+ MPLS_TTL_PROP_DEFAULT,
+ MPLS_TTL_PROP_ENABLED,
+ MPLS_TTL_PROP_DISABLED,
};
/* The route, nexthops and vias are stored together in the same memory
@@ -98,16 +126,16 @@ struct mpls_nh { /* next hop label forwarding entry */
* +----------------------+
* | mpls_nh 0 |
* +----------------------+
- * | ... |
- * +----------------------+
- * | mpls_nh n-1 |
- * +----------------------+
- * | alignment padding |
+ * | alignment padding | 4 bytes for odd number of labels
* +----------------------+
* | via[rt_max_alen] 0 |
* +----------------------+
+ * | alignment padding | via's aligned on sizeof(unsigned long)
+ * +----------------------+
* | ... |
* +----------------------+
+ * | mpls_nh n-1 |
+ * +----------------------+
* | via[rt_max_alen] n-1 |
* +----------------------+
*/
@@ -116,22 +144,30 @@ struct mpls_route { /* next hop label forwarding entry */
u8 rt_protocol;
u8 rt_payload_type;
u8 rt_max_alen;
- unsigned int rt_nhn;
- unsigned int rt_nhn_alive;
+ u8 rt_ttl_propagate;
+ u8 rt_nhn;
+ /* rt_nhn_alive is accessed under RCU in the packet path; it
+ * is modified handling netdev events with rtnl lock held
+ */
+ u8 rt_nhn_alive;
+ u8 rt_nh_size;
+ u8 rt_via_offset;
+ u8 rt_reserved1;
struct mpls_nh rt_nh[0];
};
#define for_nexthops(rt) { \
- int nhsel; struct mpls_nh *nh; \
- for (nhsel = 0, nh = (rt)->rt_nh; \
+ int nhsel; struct mpls_nh *nh; u8 *__nh; \
+ for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
- nh++, nhsel++)
+ __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define change_nexthops(rt) { \
- int nhsel; struct mpls_nh *nh; \
- for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \
+ int nhsel; struct mpls_nh *nh; u8 *__nh; \
+ for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \
+ __nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
- nh++, nhsel++)
+ __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define endfor_nexthops(rt) }
@@ -166,7 +202,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
+int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
u32 label[]);
int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
u8 via[]);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index e4e4424f9eb1..369c7a23c86c 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -29,6 +29,7 @@
static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
[MPLS_IPTUNNEL_DST] = { .type = NLA_U32 },
+ [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 },
};
static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
@@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb)
struct rtable *rt = NULL;
struct rt6_info *rt6 = NULL;
struct mpls_dev *out_mdev;
+ struct net *net;
int err = 0;
bool bos;
int i;
@@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb)
/* Find the output device */
out_dev = dst->dev;
-
- /* Obtain the ttl */
- if (dst->ops->family == AF_INET) {
- ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
- } else if (dst->ops->family == AF_INET6) {
- ttl = ipv6_hdr(skb)->hop_limit;
- rt6 = (struct rt6_info *)dst;
- } else {
- goto drop;
- }
+ net = dev_net(out_dev);
skb_orphan(skb);
@@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb)
tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
+ /* Obtain the ttl using the following set of rules.
+ *
+ * LWT ttl propagation setting:
+ * - disabled => use default TTL value from LWT
+ * - enabled => use TTL value from IPv4/IPv6 header
+ * - default =>
+ * Global ttl propagation setting:
+ * - disabled => use default TTL value from global setting
+ * - enabled => use TTL value from IPv4/IPv6 header
+ */
+ if (dst->ops->family == AF_INET) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ip_hdr(skb)->ttl;
+ rt = (struct rtable *)dst;
+ } else if (dst->ops->family == AF_INET6) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ipv6_hdr(skb)->hop_limit;
+ rt6 = (struct rt6_info *)dst;
+ } else {
+ goto drop;
+ }
+
/* Verify the destination can hold the packet */
new_header_size = mpls_encap_size(tun_encap_info);
mtu = mpls_dev_mtu(out_dev);
@@ -140,10 +164,11 @@ static int mpls_build_state(struct nlattr *nla,
struct mpls_iptunnel_encap *tun_encap_info;
struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
struct lwtunnel_state *newts;
+ u8 n_labels;
int ret;
ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
- mpls_iptunnel_policy);
+ mpls_iptunnel_policy, NULL);
if (ret < 0)
return ret;
@@ -151,15 +176,32 @@ static int mpls_build_state(struct nlattr *nla,
return -EINVAL;
- newts = lwtunnel_state_alloc(sizeof(*tun_encap_info));
+ /* determine number of labels */
+ if (nla_get_labels(tb[MPLS_IPTUNNEL_DST],
+ MAX_NEW_LABELS, &n_labels, NULL))
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
+ n_labels * sizeof(u32));
if (!newts)
return -ENOMEM;
tun_encap_info = mpls_lwtunnel_encap(newts);
- ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
+ ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
&tun_encap_info->labels, tun_encap_info->label);
if (ret)
goto errout;
+
+ tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+
+ if (tb[MPLS_IPTUNNEL_TTL]) {
+ tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
+ /* TTL 0 implies propagate from IP header */
+ tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
+ MPLS_TTL_PROP_DISABLED :
+ MPLS_TTL_PROP_ENABLED;
+ }
+
newts->type = LWTUNNEL_ENCAP_MPLS;
newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
newts->headroom = mpls_encap_size(tun_encap_info);
@@ -186,6 +228,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
tun_encap_info->label))
goto nla_put_failure;
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
+ nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -195,10 +241,16 @@ nla_put_failure:
static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
{
struct mpls_iptunnel_encap *tun_encap_info;
+ int nlsize;
tun_encap_info = mpls_lwtunnel_encap(lwtstate);
- return nla_total_size(tun_encap_info->labels * 4);
+ nlsize = nla_total_size(tun_encap_info->labels * 4);
+
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
+ nlsize += nla_total_size(1);
+
+ return nlsize;
}
static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -207,10 +259,12 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
int l;
- if (a_hdr->labels != b_hdr->labels)
+ if (a_hdr->labels != b_hdr->labels ||
+ a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
+ a_hdr->default_ttl != b_hdr->default_ttl)
return 1;
- for (l = 0; l < MAX_NEW_LABELS; l++)
+ for (l = 0; l < a_hdr->labels; l++)
if (a_hdr->label[l] != b_hdr->label[l])
return 1;
return 0;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index c296f9b606d4..9bd5b6636181 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -295,7 +295,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
- if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
+ if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+ ipaddr_policy, NULL))
return -IPSET_ERR_PROTOCOL;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
return -IPSET_ERR_PROTOCOL;
@@ -313,7 +314,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
- if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
+ if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+ ipaddr_policy, NULL))
return -IPSET_ERR_PROTOCOL;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
return -IPSET_ERR_PROTOCOL;
@@ -906,7 +908,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Without holding any locks, create private part. */
if (attr[IPSET_ATTR_DATA] &&
nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
- set->type->create_policy)) {
+ set->type->create_policy, NULL)) {
ret = -IPSET_ERR_PROTOCOL;
goto put_out;
}
@@ -1257,8 +1259,8 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
ip_set_id_t index;
/* Second pass, so parser can't fail */
- nla_parse(cda, IPSET_ATTR_CMD_MAX,
- attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
+ nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len,
+ ip_set_setname_policy, NULL);
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
@@ -1305,7 +1307,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
* manually :-(
*/
if (nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(cb->skb, nlh, ret);
+ netlink_ack(cb->skb, nlh, ret, NULL);
return ret;
}
}
@@ -1501,9 +1503,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
cmdattr = (void *)&errmsg->msg + min_len;
- nla_parse(cda, IPSET_ATTR_CMD_MAX,
- cmdattr, nlh->nlmsg_len - min_len,
- ip_set_adt_policy);
+ nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
+ nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL);
errline = nla_data(cda[IPSET_ATTR_LINENO]);
@@ -1549,7 +1550,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (attr[IPSET_ATTR_DATA]) {
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
attr[IPSET_ATTR_DATA],
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
use_lineno);
@@ -1561,7 +1562,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (nla_type(nla) != IPSET_ATTR_DATA ||
!flag_nested(nla) ||
nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
flags, use_lineno);
@@ -1603,7 +1604,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (attr[IPSET_ATTR_DATA]) {
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
attr[IPSET_ATTR_DATA],
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
use_lineno);
@@ -1615,7 +1616,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (nla_type(nla) != IPSET_ATTR_DATA ||
!flag_nested(nla) ||
nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
flags, use_lineno);
@@ -1646,7 +1647,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
return -ENOENT;
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
- set->type->adt_policy))
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
rcu_read_lock_bh();
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e6a2753dff9e..3d2ac71a83ec 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -181,7 +181,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
cp->flags |= IP_VS_CONN_F_HASHED;
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
@@ -215,7 +215,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
- atomic_dec(&cp->refcnt);
+ refcount_dec(&cp->refcnt);
ret = 1;
} else
ret = 0;
@@ -242,13 +242,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
ret = false;
/* Decrease refcnt and unlink conn only if we are last user */
- if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cp->refcnt)) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
ret = true;
}
} else
- ret = atomic_read(&cp->refcnt) ? false : true;
+ ret = refcount_read(&cp->refcnt) ? false : true;
spin_unlock(&cp->lock);
ct_write_unlock_bh(hash);
@@ -475,7 +475,7 @@ static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
void ip_vs_conn_put(struct ip_vs_conn *cp)
{
if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
- (atomic_read(&cp->refcnt) == 1) &&
+ (refcount_read(&cp->refcnt) == 1) &&
!timer_pending(&cp->timer))
/* expire connection immediately */
__ip_vs_conn_put_notimer(cp);
@@ -617,8 +617,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -714,8 +714,8 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -863,10 +863,10 @@ static void ip_vs_conn_expire(unsigned long data)
expire_later:
IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
- atomic_read(&cp->refcnt),
+ refcount_read(&cp->refcnt),
atomic_read(&cp->n_control));
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
cp->timeout = 60*HZ;
if (ipvs->sync_state & IP_VS_STATE_MASTER)
@@ -941,7 +941,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
* it in the table, so that other thread run ip_vs_random_dropentry
* but cannot drop this entry.
*/
- atomic_set(&cp->refcnt, 1);
+ refcount_set(&cp->refcnt, 1);
cp->control = NULL;
atomic_set(&cp->n_control, 0);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index db40050f8785..b4a746d0e39b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -542,7 +542,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
return cp;
@@ -1193,7 +1193,7 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
LeaveFunction(12);
return cp;
}
@@ -2231,8 +2231,6 @@ static int __net_init __ip_vs_init(struct net *net)
if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
- printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
- sizeof(struct netns_ipvs), ipvs->gen);
return 0;
/*
* Error handling
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5aeb0dde6ccc..892da70866d6 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -699,7 +699,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
dest->vfwmark,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (dest->af == dest_af &&
ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -934,7 +934,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->activeconns, 0);
atomic_set(&dest->inactconns, 0);
atomic_set(&dest->persistconns, 0);
- atomic_set(&dest->refcnt, 1);
+ refcount_set(&dest->refcnt, 1);
INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock);
@@ -998,7 +998,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
ntohs(dest->vport));
@@ -1074,7 +1074,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
spin_lock_bh(&ipvs->dest_trash_lock);
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (list_empty(&ipvs->dest_trash) && !cleanup)
mod_timer(&ipvs->dest_trash_timer,
jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
@@ -1157,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data)
spin_lock(&ipvs->dest_trash_lock);
list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
- if (atomic_read(&dest->refcnt) > 1)
+ if (refcount_read(&dest->refcnt) > 1)
continue;
if (dest->idle_start) {
if (time_before(now, dest->idle_start +
@@ -1545,7 +1545,7 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
__ip_vs_dst_cache_reset(dest);
}
spin_unlock_bh(&dest->dst_lock);
@@ -3089,7 +3089,8 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
/* Parse mandatory identifying service fields first */
if (nla == NULL ||
- nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+ nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla,
+ ip_vs_svc_policy, NULL))
return -EINVAL;
nla_af = attrs[IPVS_SVC_ATTR_AF];
@@ -3251,8 +3252,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
mutex_lock(&__ip_vs_mutex);
/* Try to find the service for which to dump destinations */
- if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
- IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+ if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX,
+ ip_vs_cmd_policy, NULL))
goto out_err;
@@ -3288,7 +3289,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
/* Parse mandatory identifying destination fields first */
if (nla == NULL ||
- nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+ nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla,
+ ip_vs_dest_policy, NULL))
return -EINVAL;
nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
@@ -3530,7 +3532,7 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
info->attrs[IPVS_CMD_ATTR_DAEMON],
- ip_vs_daemon_policy))
+ ip_vs_daemon_policy, info->extack))
goto out;
if (cmd == IPVS_CMD_NEW_DAEMON)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 5824927cf8e0..b6aa4a970c6e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -448,7 +448,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 703f11877bee..c13ff575f9f7 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
@@ -249,7 +249,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
__func__,
IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
atomic_read(&most->activeconns),
- atomic_read(&most->refcnt),
+ refcount_read(&most->refcnt),
atomic_read(&most->weight), moh);
return most;
}
@@ -612,7 +612,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index a8b63401e773..7d9d4ac596ca 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -110,7 +110,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index d952d67f904d..56f8e4b204ff 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -447,7 +447,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
sctp_state_name(cp->state),
sctp_state_name(next_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 5117bcb7d2f0..12dc8d5bc37d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -557,7 +557,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
tcp_state_name(cp->state),
tcp_state_name(new_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 58bacfc461ee..ee0530d14c5f 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -97,7 +97,7 @@ stop:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+ refcount_read(&dest->refcnt), atomic_read(&dest->weight));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index f8e2d00f528b..ab23cf203437 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -111,7 +111,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6b366fd90554..6add39e0ec20 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -83,7 +83,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 17e6d4406ca7..62258dd457ac 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -218,7 +218,7 @@ found:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
atomic_read(&dest->weight));
mark->cl = dest;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ffb78e5f7b70..3d621b8d7b8a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1133,7 +1133,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
-static struct nf_conntrack_tuple_hash *
+static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
@@ -1241,21 +1241,20 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
}
-/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */
-static inline struct nf_conn *
+/* On success, returns 0, sets skb->_nfct | ctinfo */
+static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
- int *set_reply,
- enum ip_conntrack_info *ctinfo)
+ struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
+ enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
struct nf_conn *ct;
u32 hash;
@@ -1264,7 +1263,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("Can't get tuple\n");
- return NULL;
+ return 0;
}
/* look for tuple match */
@@ -1275,33 +1274,30 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff, hash);
if (!h)
- return NULL;
+ return 0;
if (IS_ERR(h))
- return (void *)h;
+ return PTR_ERR(h);
}
ct = nf_ct_tuplehash_to_ctrack(h);
/* It exists; we have (non-exclusive) reference. */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
- *ctinfo = IP_CT_ESTABLISHED_REPLY;
- /* Please set reply bit if this packet OK */
- *set_reply = 1;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("normal packet for %p\n", ct);
- *ctinfo = IP_CT_ESTABLISHED;
+ ctinfo = IP_CT_ESTABLISHED;
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("related packet for %p\n", ct);
- *ctinfo = IP_CT_RELATED;
+ ctinfo = IP_CT_RELATED;
} else {
pr_debug("new packet for %p\n", ct);
- *ctinfo = IP_CT_NEW;
+ ctinfo = IP_CT_NEW;
}
- *set_reply = 0;
}
- nf_ct_set(skb, ct, *ctinfo);
- return ct;
+ nf_ct_set(skb, ct, ctinfo);
+ return 0;
}
unsigned int
@@ -1315,7 +1311,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
- int set_reply = 0;
int ret;
tmpl = nf_ct_get(skb, &ctinfo);
@@ -1358,23 +1353,22 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
- ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
- l3proto, l4proto, &set_reply, &ctinfo);
- if (!ct) {
- /* Not valid part of a connection */
- NF_CT_STAT_INC_ATOMIC(net, invalid);
- ret = NF_ACCEPT;
- goto out;
- }
-
- if (IS_ERR(ct)) {
+ ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
+ l3proto, l4proto);
+ if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = NF_DROP;
goto out;
}
- NF_CT_ASSERT(skb_nfct(skb));
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct) {
+ /* Not valid part of a connection */
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ ret = NF_ACCEPT;
+ goto out;
+ }
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
@@ -1399,7 +1393,8 @@ repeat:
goto out;
}
- if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+ if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
if (tmpl)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index d80073037856..a5ca5e426bae 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -133,7 +133,7 @@ nf_ct_expect_find_get(struct net *net,
rcu_read_lock();
i = __nf_ct_expect_find(net, zone, tuple);
- if (i && !atomic_inc_not_zero(&i->use))
+ if (i && !refcount_inc_not_zero(&i->use))
i = NULL;
rcu_read_unlock();
@@ -186,7 +186,7 @@ nf_ct_find_expectation(struct net *net,
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
- atomic_inc(&exp->use);
+ refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -275,7 +275,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
return NULL;
new->master = me;
- atomic_set(&new->use, 1);
+ refcount_set(&new->use, 1);
return new;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
@@ -348,7 +348,7 @@ static void nf_ct_expect_free_rcu(struct rcu_head *head)
void nf_ct_expect_put(struct nf_conntrack_expect *exp)
{
- if (atomic_dec_and_test(&exp->use))
+ if (refcount_dec_and_test(&exp->use))
call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
@@ -361,7 +361,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
/* two references : one for hash insert, one for the timer */
- atomic_add(2, &exp->use);
+ refcount_add(2, &exp->use);
hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dc7dfd68fafe..aafd25dff8c0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -908,7 +908,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nf_conntrack_l3proto *l3proto;
int ret = 0;
- ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
+ ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
if (ret < 0)
return ret;
@@ -917,7 +917,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
if (likely(l3proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_IP_MAX,
- l3proto->nla_policy);
+ l3proto->nla_policy, NULL);
if (ret == 0)
ret = l3proto->nlattr_to_tuple(tb, tuple);
}
@@ -938,7 +938,8 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
struct nf_conntrack_l4proto *l4proto;
int ret = 0;
- ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy);
+ ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
+ NULL);
if (ret < 0)
return ret;
@@ -951,7 +952,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
if (likely(l4proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_PROTO_MAX,
- l4proto->nla_policy);
+ l4proto->nla_policy, NULL);
if (ret == 0)
ret = l4proto->nlattr_to_tuple(tb, tuple);
}
@@ -1015,7 +1016,8 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
memset(tuple, 0, sizeof(*tuple));
- err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
+ err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy,
+ NULL);
if (err < 0)
return err;
@@ -1065,7 +1067,7 @@ static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
int err;
struct nlattr *tb[CTA_HELP_MAX+1];
- err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
+ err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL);
if (err < 0)
return err;
@@ -1571,7 +1573,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
struct nf_conntrack_l4proto *l4proto;
int err = 0;
- err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
+ err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
+ NULL);
if (err < 0)
return err;
@@ -1596,7 +1599,7 @@ static int change_seq_adj(struct nf_ct_seqadj *seq,
int err;
struct nlattr *cda[CTA_SEQADJ_MAX+1];
- err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy);
+ err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL);
if (err < 0)
return err;
@@ -2353,7 +2356,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
struct nlattr *cda[CTA_MAX+1];
int ret;
- ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
+ ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy, NULL);
if (ret < 0)
return ret;
@@ -2390,7 +2393,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
struct nf_conntrack_expect *exp;
int err;
- err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
+ err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy,
+ NULL);
if (err < 0)
return err;
@@ -2698,7 +2702,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
@@ -2744,7 +2748,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
@@ -3015,7 +3019,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
struct nf_conntrack_tuple nat_tuple = {};
int err;
- err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy);
+ err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr,
+ exp_nat_nla_policy, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 93dd1c5b7bff..b2e02dfe7fa8 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -665,7 +665,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr,
- dccp_nla_policy);
+ dccp_nla_policy, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 33279aab583d..2a7300587c87 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -584,10 +584,8 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
if (!attr)
return 0;
- err = nla_parse_nested(tb,
- CTA_PROTOINFO_SCTP_MAX,
- attr,
- sctp_nla_policy);
+ err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr,
+ sctp_nla_policy, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b122e9dacfed..85bde77ad967 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1234,7 +1234,8 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
if (!pattr)
return 0;
- err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
+ err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
+ tcp_nla_policy, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 82802e4a6640..908ba5abbc0b 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -751,7 +751,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
const struct nf_nat_l4proto *l4proto;
int err;
- err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+ err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr,
+ protonat_nla_policy, NULL);
if (err < 0)
return err;
@@ -780,7 +781,7 @@ nfnetlink_parse_nat(const struct nlattr *nat,
memset(range, 0, sizeof(*range));
- err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+ err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 434c739dfeca..907431318637 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1182,7 +1182,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
struct nft_stats *stats;
int err;
- err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
+ err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy,
+ NULL);
if (err < 0)
return ERR_PTR(err);
@@ -1257,7 +1258,7 @@ static int nft_chain_parse_hook(struct net *net,
int err;
err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
- nft_hook_policy);
+ nft_hook_policy, NULL);
if (err < 0)
return err;
@@ -1724,7 +1725,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
struct nlattr *tb[NFTA_EXPR_MAX + 1];
int err;
- err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+ err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL);
if (err < 0)
return err;
@@ -1734,7 +1735,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (tb[NFTA_EXPR_DATA]) {
err = nla_parse_nested(info->tb, type->maxattr,
- tb[NFTA_EXPR_DATA], type->policy);
+ tb[NFTA_EXPR_DATA], type->policy, NULL);
if (err < 0)
goto err1;
} else
@@ -1772,8 +1773,19 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx,
goto err1;
}
+ if (ops->validate) {
+ const struct nft_data *data = NULL;
+
+ err = ops->validate(ctx, expr, &data);
+ if (err < 0)
+ goto err2;
+ }
+
return 0;
+err2:
+ if (ops->destroy)
+ ops->destroy(ctx, expr);
err1:
expr->ops = NULL;
return err;
@@ -2523,8 +2535,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
return 0;
}
-struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
- const struct nlattr *nla, u8 genmask)
+static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -2538,11 +2550,10 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
-struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
- const struct nlattr *nla,
- u8 genmask)
+static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_trans *trans;
u32 id = ntohl(nla_get_be32(nla));
@@ -2557,7 +2568,25 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
+
+struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
+ const struct nlattr *nla_set_name,
+ const struct nlattr *nla_set_id,
+ u8 genmask)
+{
+ struct nft_set *set;
+
+ set = nf_tables_set_lookup(table, nla_set_name, genmask);
+ if (IS_ERR(set)) {
+ if (!nla_set_id)
+ return set;
+
+ set = nf_tables_set_lookup_byid(net, nla_set_id, genmask);
+ }
+ return set;
+}
+EXPORT_SYMBOL_GPL(nft_set_lookup);
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
@@ -2851,7 +2880,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
struct nlattr *da[NFTA_SET_DESC_MAX + 1];
int err;
- err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+ err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla,
+ nft_set_desc_policy, NULL);
if (err < 0)
return err;
@@ -3353,7 +3383,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
int event, err;
err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
- NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy);
+ NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
+ NULL);
if (err < 0)
return err;
@@ -3612,7 +3643,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
- nft_set_elem_policy);
+ nft_set_elem_policy, NULL);
if (err < 0)
return err;
@@ -3842,7 +3873,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
- nft_set_elem_policy);
+ nft_set_elem_policy, NULL);
if (err < 0)
goto err1;
@@ -4064,7 +4095,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
};
-static struct nft_object *nft_obj_init(const struct nft_object_type *type,
+static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
+ const struct nft_object_type *type,
const struct nlattr *attr)
{
struct nlattr *tb[type->maxattr + 1];
@@ -4072,7 +4104,8 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
int err;
if (attr) {
- err = nla_parse_nested(tb, type->maxattr, attr, type->policy);
+ err = nla_parse_nested(tb, type->maxattr, attr, type->policy,
+ NULL);
if (err < 0)
goto err1;
} else {
@@ -4084,7 +4117,7 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
if (obj == NULL)
goto err1;
- err = type->init((const struct nlattr * const *)tb, obj);
+ err = type->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
goto err2;
@@ -4192,7 +4225,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
if (IS_ERR(type))
return PTR_ERR(type);
- obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]);
+ obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err1;
@@ -5285,7 +5318,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_chain *chain;
int err;
- err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+ err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy,
+ NULL);
if (err < 0)
return err;
@@ -5415,7 +5449,7 @@ int nft_data_init(const struct nft_ctx *ctx,
struct nlattr *tb[NFTA_DATA_MAX + 1];
int err;
- err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+ err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 68eda920160e..e42f858b91d2 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -148,7 +148,8 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
EXPORT_SYMBOL_GPL(nfnetlink_unicast);
/* Process one complete nfnetlink message. */
-static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
const struct nfnl_callback *nc;
@@ -191,8 +192,8 @@ replay:
int attrlen = nlh->nlmsg_len - min_len;
__u8 subsys_id = NFNL_SUBSYS_ID(type);
- err = nla_parse(cda, ss->cb[cb_id].attr_count,
- attr, attrlen, ss->cb[cb_id].policy);
+ err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen,
+ ss->cb[cb_id].policy, extack);
if (err < 0) {
rcu_read_unlock();
return err;
@@ -261,7 +262,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
struct nfnl_err *nfnl_err, *next;
list_for_each_entry_safe(nfnl_err, next, err_list, head) {
- netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
+ netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL);
nfnl_err_del(nfnl_err);
}
}
@@ -284,13 +285,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
if (subsys_id >= NFNL_SUBSYS_COUNT)
- return netlink_ack(skb, nlh, -EINVAL);
+ return netlink_ack(skb, nlh, -EINVAL, NULL);
replay:
status = 0;
skb = netlink_skb_clone(oskb, GFP_KERNEL);
if (!skb)
- return netlink_ack(oskb, nlh, -ENOMEM);
+ return netlink_ack(oskb, nlh, -ENOMEM, NULL);
nfnl_lock(subsys_id);
ss = nfnl_dereference_protected(subsys_id);
@@ -304,20 +305,20 @@ replay:
#endif
{
nfnl_unlock(subsys_id);
- netlink_ack(oskb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- netlink_ack(oskb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
return kfree_skb(skb);
}
if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
nfnl_unlock(subsys_id);
- netlink_ack(oskb, nlh, -ERESTART);
+ netlink_ack(oskb, nlh, -ERESTART, NULL);
return kfree_skb(skb);
}
@@ -376,8 +377,8 @@ replay:
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;
- err = nla_parse(cda, ss->cb[cb_id].attr_count,
- attr, attrlen, ss->cb[cb_id].policy);
+ err = nla_parse(cda, ss->cb[cb_id].attr_count, attr,
+ attrlen, ss->cb[cb_id].policy, NULL);
if (err < 0)
goto ack;
@@ -407,7 +408,8 @@ ack:
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
- netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
+ netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM,
+ NULL);
status |= NFNL_BATCH_FAILURE;
goto done;
}
@@ -465,9 +467,10 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh)
skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
return;
- err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy);
+ err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy,
+ NULL);
if (err < 0) {
- netlink_ack(skb, nlh, err);
+ netlink_ack(skb, nlh, err, NULL);
return;
}
if (cda[NFNL_BATCH_GENID])
@@ -493,7 +496,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
return;
if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
- netlink_ack(skb, nlh, -EPERM);
+ netlink_ack(skb, nlh, -EPERM, NULL);
return;
}
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index d44d89b56127..2837d5fb98bd 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/slab.h>
@@ -32,7 +33,7 @@ struct nf_acct {
atomic64_t bytes;
unsigned long flags;
struct list_head head;
- atomic_t refcnt;
+ refcount_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
char data[0];
@@ -123,7 +124,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
atomic64_set(&nfacct->pkts,
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
- atomic_set(&nfacct->refcnt, 1);
+ refcount_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
return 0;
}
@@ -166,7 +167,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NFACCT_PAD) ||
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes),
NFACCT_PAD) ||
- nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
+ nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt))))
goto nla_put_failure;
if (acct->flags & NFACCT_F_QUOTA) {
u64 *quota = (u64 *)acct->data;
@@ -243,7 +244,8 @@ nfacct_filter_alloc(const struct nlattr * const attr)
struct nlattr *tb[NFACCT_FILTER_MAX + 1];
int err;
- err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
+ err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy,
+ NULL);
if (err < 0)
return ERR_PTR(err);
@@ -329,7 +331,7 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
/* We want to avoid races with nfnl_acct_put. So only when the current
* refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cur->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&cur->head);
kfree_rcu(cur, rcu_head);
@@ -413,7 +415,7 @@ struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&cur->refcnt)) {
+ if (!refcount_inc_not_zero(&cur->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -429,7 +431,7 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
- if (atomic_dec_and_test(&acct->refcnt))
+ if (refcount_dec_and_test(&acct->refcnt))
kfree_rcu(acct, rcu_head);
module_put(THIS_MODULE);
@@ -502,7 +504,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
list_del_rcu(&cur->head);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index d45558178da5..5b6c68311566 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -77,7 +77,8 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
int err;
struct nlattr *tb[NFCTH_TUPLE_MAX+1];
- err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol);
+ err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr,
+ nfnl_cthelper_tuple_pol, NULL);
if (err < 0)
return err;
@@ -137,7 +138,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
int err;
struct nlattr *tb[NFCTH_POLICY_MAX+1];
- err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol);
+ err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+ nfnl_cthelper_expect_pol, NULL);
if (err < 0)
return err;
@@ -171,7 +173,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
unsigned int class_max;
ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
- nfnl_cthelper_expect_policy_set);
+ nfnl_cthelper_expect_policy_set, NULL);
if (ret < 0)
return ret;
@@ -276,7 +278,7 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
int err;
err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
- nfnl_cthelper_expect_pol);
+ nfnl_cthelper_expect_pol, NULL);
if (err < 0)
return err;
@@ -336,7 +338,7 @@ static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
int err;
err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
- nfnl_cthelper_expect_policy_set);
+ nfnl_cthelper_expect_policy_set, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 47d6656c9119..0a3510e7e396 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -56,7 +56,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
- attr, l4proto->ctnl_timeout.nla_policy);
+ attr, l4proto->ctnl_timeout.nla_policy,
+ NULL);
if (ret < 0)
return ret;
@@ -138,7 +139,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
timeout->l3num = l3num;
timeout->l4proto = l4proto;
- atomic_set(&timeout->refcnt, 1);
+ refcount_set(&timeout->refcnt, 1);
list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
@@ -172,7 +173,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
nla_put_be32(skb, CTA_TIMEOUT_USE,
- htonl(atomic_read(&timeout->refcnt))))
+ htonl(refcount_read(&timeout->refcnt))))
goto nla_put_failure;
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
@@ -339,7 +340,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
/* We want to avoid races with ctnl_timeout_put. So only when the
* current refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&timeout->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
@@ -536,7 +537,7 @@ ctnl_timeout_find_get(struct net *net, const char *name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&timeout->refcnt)) {
+ if (!refcount_inc_not_zero(&timeout->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -550,7 +551,7 @@ err:
static void ctnl_timeout_put(struct ctnl_timeout *timeout)
{
- if (atomic_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt))
kfree_rcu(timeout, rcu_head);
module_put(THIS_MODULE);
@@ -601,7 +602,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
list_del_rcu(&cur->head);
nf_ct_l4proto_put(cur->l4proto);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 08247bf7d7b8..ecd857b75ffe 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -40,6 +40,8 @@
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
+
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
@@ -57,7 +59,7 @@
struct nfulnl_instance {
struct hlist_node hlist; /* global list of instances */
spinlock_t lock;
- atomic_t use; /* use count */
+ refcount_t use; /* use count */
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
@@ -115,7 +117,7 @@ __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
static inline void
instance_get(struct nfulnl_instance *inst)
{
- atomic_inc(&inst->use);
+ refcount_inc(&inst->use);
}
static struct nfulnl_instance *
@@ -125,7 +127,7 @@ instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
rcu_read_lock_bh();
inst = __instance_lookup(log, group_num);
- if (inst && !atomic_inc_not_zero(&inst->use))
+ if (inst && !refcount_inc_not_zero(&inst->use))
inst = NULL;
rcu_read_unlock_bh();
@@ -145,7 +147,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head)
static void
instance_put(struct nfulnl_instance *inst)
{
- if (inst && atomic_dec_and_test(&inst->use))
+ if (inst && refcount_dec_and_test(&inst->use))
call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
}
@@ -180,7 +182,7 @@ instance_create(struct net *net, u_int16_t group_num,
INIT_HLIST_NODE(&inst->hlist);
spin_lock_init(&inst->lock);
/* needs to be two, since we _put() after creation */
- atomic_set(&inst->use, 2);
+ refcount_set(&inst->use, 2);
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
@@ -1031,7 +1033,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
- inst->flushtimeout, atomic_read(&inst->use));
+ inst->flushtimeout, refcount_read(&inst->use));
return 0;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 933509ebf3d3..3be6fef30581 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1109,7 +1109,7 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
int err;
err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN],
- nfqa_vlan_policy);
+ nfqa_vlan_policy, NULL);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index c21e7eb8dce0..d76d0f36799f 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -200,7 +200,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
int err;
err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr,
- nft_rule_compat_policy);
+ nft_rule_compat_policy, NULL);
if (err < 0)
return err;
@@ -230,10 +230,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(target->table, ctx->chain);
- if (ret < 0)
- goto err;
-
target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -419,10 +415,6 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(match->table, ctx->chain);
- if (ret < 0)
- goto err;
-
match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 7f8422213341..67a710ebde09 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -82,7 +82,8 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_counter_obj_init(const struct nlattr * const tb[],
+static int nft_counter_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 0264258c46fe..640fe5a5865e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -32,6 +32,12 @@ struct nft_ct {
};
};
+struct nft_ct_helper_obj {
+ struct nf_conntrack_helper *helper4;
+ struct nf_conntrack_helper *helper6;
+ u8 l4proto;
+};
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
@@ -733,6 +739,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = {
.owner = THIS_MODULE,
};
+static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conntrack_helper *help4, *help6;
+ char name[NF_CT_HELPER_NAME_LEN];
+ int family = ctx->afi->family;
+
+ if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
+ return -EINVAL;
+
+ priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
+ if (!priv->l4proto)
+ return -ENOENT;
+
+ nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
+
+ if (tb[NFTA_CT_HELPER_L3PROTO])
+ family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
+
+ help4 = NULL;
+ help6 = NULL;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ if (ctx->afi->family == NFPROTO_IPV6)
+ return -EINVAL;
+
+ help4 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_IPV6:
+ if (ctx->afi->family == NFPROTO_IPV4)
+ return -EINVAL;
+
+ help6 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_NETDEV: /* fallthrough */
+ case NFPROTO_BRIDGE: /* same */
+ case NFPROTO_INET:
+ help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
+ priv->l4proto);
+ help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
+ priv->l4proto);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ /* && is intentional; only error if INET found neither ipv4 or ipv6 */
+ if (!help4 && !help6)
+ return -ENOENT;
+
+ priv->helper4 = help4;
+ priv->helper6 = help6;
+
+ return 0;
+}
+
+static void nft_ct_helper_obj_destroy(struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+
+ if (priv->helper4)
+ module_put(priv->helper4->me);
+ if (priv->helper6)
+ module_put(priv->helper6->me);
+}
+
+static void nft_ct_helper_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+ struct nf_conntrack_helper *to_assign = NULL;
+ struct nf_conn_help *help;
+
+ if (!ct ||
+ nf_ct_is_confirmed(ct) ||
+ nf_ct_is_template(ct) ||
+ priv->l4proto != nf_ct_protonum(ct))
+ return;
+
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ to_assign = priv->helper4;
+ break;
+ case NFPROTO_IPV6:
+ to_assign = priv->helper6;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (!to_assign)
+ return;
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ return;
+
+ help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+ if (help) {
+ rcu_assign_pointer(help->helper, to_assign);
+ set_bit(IPS_HELPER_BIT, &ct->status);
+ }
+}
+
+static int nft_ct_helper_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ const struct nf_conntrack_helper *helper = priv->helper4;
+ u16 family;
+
+ if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
+ return -1;
+
+ if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
+ return -1;
+
+ if (priv->helper4 && priv->helper6)
+ family = NFPROTO_INET;
+ else if (priv->helper6)
+ family = NFPROTO_IPV6;
+ else
+ family = NFPROTO_IPV4;
+
+ if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
+ return -1;
+
+ return 0;
+}
+
+static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
+ [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
+ .len = NF_CT_HELPER_NAME_LEN - 1 },
+ [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
+ [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_helper_obj __read_mostly = {
+ .type = NFT_OBJECT_CT_HELPER,
+ .size = sizeof(struct nft_ct_helper_obj),
+ .maxattr = NFTA_CT_HELPER_MAX,
+ .policy = nft_ct_helper_policy,
+ .eval = nft_ct_helper_obj_eval,
+ .init = nft_ct_helper_obj_init,
+ .destroy = nft_ct_helper_obj_destroy,
+ .dump = nft_ct_helper_obj_dump,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_ct_module_init(void)
{
int err;
@@ -747,7 +909,14 @@ static int __init nft_ct_module_init(void)
if (err < 0)
goto err1;
+ err = nft_register_obj(&nft_ct_helper_obj);
+ if (err < 0)
+ goto err2;
+
return 0;
+
+err2:
+ nft_unregister_expr(&nft_notrack_type);
err1:
nft_unregister_expr(&nft_ct_type);
return err;
@@ -755,6 +924,7 @@ err1:
static void __exit nft_ct_module_exit(void)
{
+ nft_unregister_obj(&nft_ct_helper_obj);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
@@ -766,3 +936,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("ct");
MODULE_ALIAS_NFT_EXPR("notrack");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 049ad2d9ee66..3948da380259 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -133,16 +133,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
priv->invert = true;
}
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
- genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_DYNSET_SET_ID])
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_DYNSET_SET_ID],
- genmask);
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME],
+ tb[NFTA_DYNSET_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->ops->update == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index c308920b194c..d212a85d2f33 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -98,14 +98,21 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
goto err;
offset = i + priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- memcpy(dest, opt + offset, priv->len);
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = 1;
+ } else {
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ memcpy(dest, opt + offset, priv->len);
+ }
return;
}
err:
- regs->verdict.code = NFT_BREAK;
+ if (priv->flags & NFT_EXTHDR_F_PRESENT)
+ *dest = 0;
+ else
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 29a4906adc27..21df8cccea65 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
EXPORT_SYMBOL(nft_fib_policy);
#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
@@ -112,7 +113,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- return nft_fib_validate(ctx, expr, NULL);
+ return 0;
}
EXPORT_SYMBOL_GPL(nft_fib_init);
@@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
}
EXPORT_SYMBOL_GPL(nft_fib_dump);
-void nft_fib_store_result(void *reg, enum nft_fib_result r,
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct nft_pktinfo *pkt, int index)
{
struct net_device *dev;
u32 *dreg = reg;
- switch (r) {
+ switch (priv->result) {
case NFT_FIB_RESULT_OIF:
- *dreg = index;
+ *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
break;
case NFT_FIB_RESULT_OIFNAME:
dev = dev_get_by_index_rcu(nft_net(pkt), index);
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ if (priv->flags & NFTA_FIB_F_PRESENT)
+ *dreg = !!dev;
+ else
+ strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c4dad1254ead..52a5079a91a3 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -17,7 +17,7 @@
#include <net/netfilter/nf_tables_core.h>
#include <linux/jhash.h>
-struct nft_hash {
+struct nft_jhash {
enum nft_registers sreg:8;
enum nft_registers dreg:8;
u8 len;
@@ -27,11 +27,11 @@ struct nft_hash {
u32 offset;
};
-static void nft_hash_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_jhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
const void *data = &regs->data[priv->sreg];
u32 h;
@@ -39,6 +39,25 @@ static void nft_hash_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = h + priv->offset;
}
+struct nft_symhash {
+ enum nft_registers dreg:8;
+ u32 modulus;
+ u32 offset;
+};
+
+static void nft_symhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_symhash *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ u32 h;
+
+ h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus);
+
+ regs->data[priv->dreg] = h + priv->offset;
+}
+
static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_SREG] = { .type = NLA_U32 },
[NFTA_HASH_DREG] = { .type = NLA_U32 },
@@ -46,13 +65,14 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_MODULUS] = { .type = NLA_U32 },
[NFTA_HASH_SEED] = { .type = NLA_U32 },
[NFTA_HASH_OFFSET] = { .type = NLA_U32 },
+ [NFTA_HASH_TYPE] = { .type = NLA_U32 },
};
-static int nft_hash_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_jhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
u32 len;
int err;
@@ -95,10 +115,36 @@ static int nft_hash_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_hash_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_symhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (!tb[NFTA_HASH_DREG] ||
+ !tb[NFTA_HASH_MODULUS])
+ return -EINVAL;
+
+ if (tb[NFTA_HASH_OFFSET])
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
+ priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
+
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+ if (priv->modulus <= 1)
+ return -ERANGE;
+
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_jhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
- const struct nft_hash *priv = nft_expr_priv(expr);
+ const struct nft_jhash *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
goto nla_put_failure;
@@ -114,6 +160,28 @@ static int nft_hash_dump(struct sk_buff *skb,
if (priv->offset != 0)
if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_symhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+ goto nla_put_failure;
+ if (priv->offset != 0)
+ if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -121,17 +189,46 @@ nla_put_failure:
}
static struct nft_expr_type nft_hash_type;
-static const struct nft_expr_ops nft_hash_ops = {
+static const struct nft_expr_ops nft_jhash_ops = {
.type = &nft_hash_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
- .eval = nft_hash_eval,
- .init = nft_hash_init,
- .dump = nft_hash_dump,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
+ .eval = nft_jhash_eval,
+ .init = nft_jhash_init,
+ .dump = nft_jhash_dump,
};
+static const struct nft_expr_ops nft_symhash_ops = {
+ .type = &nft_hash_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
+ .eval = nft_symhash_eval,
+ .init = nft_symhash_init,
+ .dump = nft_symhash_dump,
+};
+
+static const struct nft_expr_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ u32 type;
+
+ if (!tb[NFTA_HASH_TYPE])
+ return &nft_jhash_ops;
+
+ type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
+ switch (type) {
+ case NFT_HASH_SYM:
+ return &nft_symhash_ops;
+ case NFT_HASH_JENKINS:
+ return &nft_jhash_ops;
+ default:
+ break;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_hash_type __read_mostly = {
.name = "hash",
- .ops = &nft_hash_ops,
+ .select_ops = &nft_hash_select_ops,
.policy = nft_hash_policy,
.maxattr = NFTA_HASH_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index c6baf412236d..18dd57a52651 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -17,9 +17,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(limit_lock);
-
struct nft_limit {
+ spinlock_t lock;
u64 last;
u64 tokens;
u64 tokens_max;
@@ -34,7 +33,7 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
u64 now, tokens;
s64 delta;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&limit->lock);
now = ktime_get_ns();
tokens = limit->tokens + now - limit->last;
if (tokens > limit->tokens_max)
@@ -44,11 +43,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
delta = tokens - cost;
if (delta >= 0) {
limit->tokens = delta;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return limit->invert;
}
limit->tokens = tokens;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return !limit->invert;
}
@@ -86,6 +85,7 @@ static int nft_limit_init(struct nft_limit *limit,
limit->invert = true;
}
limit->last = ktime_get_ns();
+ spin_lock_init(&limit->lock);
return 0;
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index e21aea7e5ec8..475570e89ede 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -71,16 +71,10 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_LOOKUP_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_LOOKUP_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+ tb[NFTA_LOOKUP_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->flags & NFT_SET_EVAL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 11ce016cd479..6ac03d4266c9 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -46,10 +46,6 @@ int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- err = nft_masq_validate(ctx, expr, NULL);
- if (err)
- return err;
-
if (tb[NFTA_MASQ_FLAGS]) {
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
if (priv->flags & ~NF_NAT_RANGE_MASK)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 7b60e01f38ff..9563ce3c23aa 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -372,10 +372,6 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_meta_set_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 439e0bd152a0..ed548d06b6dd 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -138,10 +138,6 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
- err = nft_nat_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1ae8c49ca4a1..1dd428fbaaa3 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -116,16 +116,10 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
struct nft_set *set;
int err;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_OBJREF_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_OBJREF_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME],
+ tb[NFTA_OBJREF_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 2d6fe3559912..25e33159be57 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -99,7 +99,8 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_quota_obj_init(const struct nlattr * const tb[],
+static int nft_quota_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_quota *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 40dcd05146d5..1e66538bf0ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -47,10 +47,6 @@ int nft_redir_init(const struct nft_ctx *ctx,
unsigned int plen;
int err;
- err = nft_redir_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index c64de3f7379d..29f5bd2377b0 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -42,11 +42,6 @@ int nft_reject_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 9e90a02cb104..5a7fb5ff867d 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -66,11 +66,7 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 78dfbf9588b3..e97e2fb53f0a 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,9 +18,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(nft_rbtree_lock);
-
struct nft_rbtree {
+ rwlock_t lock;
struct rb_root root;
};
@@ -44,14 +43,14 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
const void *this;
int d;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
parent = priv->root.rb_node;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -75,7 +74,7 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
}
if (nft_rbtree_interval_end(rbe))
goto out;
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -85,12 +84,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return false;
}
@@ -140,12 +139,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
{
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
int err;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
err = __nft_rbtree_insert(net, set, rbe, ext);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
return err;
}
@@ -157,9 +157,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
rb_erase(&rbe->node, &priv->root);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
}
static void nft_rbtree_activate(const struct net *net,
@@ -224,12 +224,12 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -242,13 +242,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return;
}
cont:
iter->count++;
}
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
}
static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -262,6 +262,7 @@ static int nft_rbtree_init(const struct nft_set *set,
{
struct nft_rbtree *priv = nft_set_priv(set);
+ rwlock_init(&priv->lock);
priv->root = RB_ROOT;
return 0;
}
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index dab962df1787..d27b5f1ea619 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -18,6 +18,7 @@
#include <linux/netfilter/xt_limit.h>
struct xt_limit_priv {
+ spinlock_t lock;
unsigned long prev;
uint32_t credit;
};
@@ -32,8 +33,6 @@ MODULE_ALIAS("ip6t_limit");
* see net/sched/sch_tbf.c in the linux source tree
*/
-static DEFINE_SPINLOCK(limit_lock);
-
/* Rusty: This is my (non-mathematically-inclined) understanding of
this algorithm. The `average rate' in jiffies becomes your initial
amount of credit `credit' and the most credit you can ever have
@@ -72,7 +71,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct xt_limit_priv *priv = r->master;
unsigned long now = jiffies;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&priv->lock);
priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
if (priv->credit > r->credit_cap)
priv->credit = r->credit_cap;
@@ -80,11 +79,11 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (priv->credit >= r->cost) {
/* We're not limited. */
priv->credit -= r->cost;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return true;
}
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return false;
}
@@ -126,6 +125,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
r->credit_cap = priv->credit; /* Credits full. */
r->cost = user2credits(r->avg);
}
+ spin_lock_init(&priv->lock);
+
return 0;
}
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 4149d3e63589..9aacf2da3d98 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -101,7 +101,7 @@ static int netlbl_cipsov4_add_common(struct genl_info *info,
if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST],
NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ netlbl_cipsov4_genl_policy, NULL) != 0)
return -EINVAL;
nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem)
@@ -148,7 +148,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ netlbl_cipsov4_genl_policy, NULL) != 0)
return -EINVAL;
doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
@@ -170,10 +170,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
nla_a_rem)
if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) {
- if (nla_validate_nested(nla_a,
- NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
- goto add_std_failure;
+ if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy,
+ NULL) != 0)
+ goto add_std_failure;
nla_for_each_nested(nla_b, nla_a, nla_b_rem)
switch (nla_type(nla_b)) {
case NLBL_CIPSOV4_A_MLSLVLLOC:
@@ -236,7 +236,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) {
if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ netlbl_cipsov4_genl_policy, NULL) != 0)
goto add_std_failure;
nla_for_each_nested(nla_a,
@@ -244,8 +244,9 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
nla_a_rem)
if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) {
if (nla_validate_nested(nla_a,
- NLBL_CIPSOV4_A_MAX,
- netlbl_cipsov4_genl_policy) != 0)
+ NLBL_CIPSOV4_A_MAX,
+ netlbl_cipsov4_genl_policy,
+ NULL) != 0)
goto add_std_failure;
nla_for_each_nested(nla_b, nla_a, nla_b_rem)
switch (nla_type(nla_b)) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 596eaff66649..ee841f00a6ec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -78,14 +78,6 @@ struct listeners {
/* state bits */
#define NETLINK_S_CONGESTED 0x0
-/* flags */
-#define NETLINK_F_KERNEL_SOCKET 0x1
-#define NETLINK_F_RECV_PKTINFO 0x2
-#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_F_RECV_NO_ENOBUFS 0x8
-#define NETLINK_F_LISTEN_ALL_NSID 0x10
-#define NETLINK_F_CAP_ACK 0x20
-
static inline int netlink_is_kernel(struct sock *sk)
{
return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
@@ -1660,6 +1652,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
nlk->flags &= ~NETLINK_F_CAP_ACK;
err = 0;
break;
+ case NETLINK_EXT_ACK:
+ if (val)
+ nlk->flags |= NETLINK_F_EXT_ACK;
+ else
+ nlk->flags &= ~NETLINK_F_EXT_ACK;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -1744,6 +1743,15 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
err = 0;
break;
+ case NETLINK_EXT_ACK:
+ if (len < sizeof(int))
+ return -EINVAL;
+ len = sizeof(int);
+ val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0;
+ if (put_user(len, optlen) || put_user(val, optval))
+ return -EFAULT;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2275,21 +2283,44 @@ error_free:
}
EXPORT_SYMBOL(__netlink_dump_start);
-void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
+void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
struct nlmsghdr *rep;
struct nlmsgerr *errmsg;
size_t payload = sizeof(*errmsg);
+ size_t tlvlen = 0;
struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
+ unsigned int flags = 0;
/* Error messages get the original request appened, unless the user
- * requests to cap the error message.
+ * requests to cap the error message, and get extra error data if
+ * requested.
*/
- if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
- payload += nlmsg_len(nlh);
+ if (err) {
+ if (!(nlk->flags & NETLINK_F_CAP_ACK))
+ payload += nlmsg_len(nlh);
+ else
+ flags |= NLM_F_CAPPED;
+ if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
+ if (extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+ if (extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ }
+ } else {
+ flags |= NLM_F_CAPPED;
+
+ if (nlk->flags & NETLINK_F_EXT_ACK &&
+ extack && extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
+ }
+
+ if (tlvlen)
+ flags |= NLM_F_ACK_TLVS;
- skb = nlmsg_new(payload, GFP_KERNEL);
+ skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
if (!skb) {
struct sock *sk;
@@ -2305,17 +2336,42 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
}
rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- NLMSG_ERROR, payload, 0);
+ NLMSG_ERROR, payload, flags);
errmsg = nlmsg_data(rep);
errmsg->error = err;
memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
+
+ if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
+ if (err) {
+ if (extack->_msg)
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
+ extack->_msg));
+ if (extack->bad_attr &&
+ !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+ (u8 *)extack->bad_attr >= in_skb->data +
+ in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr -
+ in_skb->data));
+ } else {
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len,
+ extack->cookie));
+ }
+ }
+
+ nlmsg_end(skb, rep);
+
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
}
EXPORT_SYMBOL(netlink_ack);
int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
- struct nlmsghdr *))
+ struct nlmsghdr *,
+ struct netlink_ext_ack *))
{
+ struct netlink_ext_ack extack = {};
struct nlmsghdr *nlh;
int err;
@@ -2336,13 +2392,13 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
goto ack;
- err = cb(skb, nlh);
+ err = cb(skb, nlh, &extack);
if (err == -EINTR)
goto skip;
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err)
- netlink_ack(skb, nlh, err);
+ netlink_ack(skb, nlh, err, &extack);
skip:
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 4fdb38318977..3490f2430532 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -6,6 +6,15 @@
#include <linux/workqueue.h>
#include <net/sock.h>
+/* flags */
+#define NETLINK_F_KERNEL_SOCKET 0x1
+#define NETLINK_F_RECV_PKTINFO 0x2
+#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
+#define NETLINK_F_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_LISTEN_ALL_NSID 0x10
+#define NETLINK_F_CAP_ACK 0x20
+#define NETLINK_F_EXT_ACK 0x40
+
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index a5546249fb10..8faa20b4d457 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -19,6 +19,27 @@ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
nlk->groups);
}
+static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ u32 flags = 0;
+
+ if (nlk->cb_running)
+ flags |= NDIAG_FLAG_CB_RUNNING;
+ if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ flags |= NDIAG_FLAG_PKTINFO;
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ flags |= NDIAG_FLAG_BROADCAST_ERROR;
+ if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)
+ flags |= NDIAG_FLAG_NO_ENOBUFS;
+ if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ flags |= NDIAG_FLAG_LISTEN_ALL_NSID;
+ if (nlk->flags & NETLINK_F_CAP_ACK)
+ flags |= NDIAG_FLAG_CAP_ACK;
+
+ return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags);
+}
+
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
struct netlink_diag_req *req,
u32 portid, u32 seq, u32 flags, int sk_ino)
@@ -52,6 +73,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
goto out_nlmsg_trim;
+ if ((req->ndiag_show & NDIAG_SHOW_FLAGS) &&
+ sk_diag_put_flags(sk, skb))
+ goto out_nlmsg_trim;
+
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 92e0981f7404..10f8b4cff40a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -497,7 +497,8 @@ static int genl_lock_done(struct netlink_callback *cb)
static int genl_family_rcv_msg(const struct genl_family *family,
struct sk_buff *skb,
- struct nlmsghdr *nlh)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
const struct genl_ops *ops;
struct net *net = sock_net(skb->sk);
@@ -573,7 +574,7 @@ static int genl_family_rcv_msg(const struct genl_family *family,
if (attrbuf) {
err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
- ops->policy);
+ ops->policy, extack);
if (err < 0)
goto out;
}
@@ -584,6 +585,7 @@ static int genl_family_rcv_msg(const struct genl_family *family,
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
+ info.extack = extack;
genl_info_net_set(&info, net);
memset(&info.user_ptr, 0, sizeof(info.user_ptr));
@@ -605,7 +607,8 @@ out:
return err;
}
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
const struct genl_family *family;
int err;
@@ -617,7 +620,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!family->parallel_ops)
genl_lock();
- err = genl_family_rcv_msg(family, skb, nlh);
+ err = genl_family_rcv_msg(family, skb, nlh, extack);
if (!family->parallel_ops)
genl_unlock();
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 03f3d5c7beb8..6b0850e63e09 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -119,7 +119,8 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
u32 idx;
rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize,
- attrbuf, nfc_genl_family.maxattr, nfc_genl_policy);
+ attrbuf, nfc_genl_family.maxattr, nfc_genl_policy,
+ NULL);
if (rc < 0)
return ERR_PTR(rc);
@@ -303,6 +304,17 @@ free_msg:
return -EMSGSIZE;
}
+static int nfc_genl_setup_device_added(struct nfc_dev *dev, struct sk_buff *msg)
+{
+ if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
+ nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
+ nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
+ nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) ||
+ nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
+ return -1;
+ return 0;
+}
+
int nfc_genl_device_added(struct nfc_dev *dev)
{
struct sk_buff *msg;
@@ -317,10 +329,7 @@ int nfc_genl_device_added(struct nfc_dev *dev)
if (!hdr)
goto free_msg;
- if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
- nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
- nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
- nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up))
+ if (nfc_genl_setup_device_added(dev, msg))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -596,11 +605,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
if (cb)
genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
- if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
- nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
- nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
- nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) ||
- nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
+ if (nfc_genl_setup_device_added(dev, msg))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -918,7 +923,7 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
rc = nfc_activate_target(dev, target_idx, protocol);
nfc_put_device(dev);
- return 0;
+ return rc;
}
static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
@@ -1161,7 +1166,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
- nfc_sdp_genl_policy);
+ nfc_sdp_genl_policy, info->extack);
if (rc != 0) {
rc = -EINVAL;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c82301ce3fff..e4610676299b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -44,13 +44,10 @@
#include "conntrack.h"
#include "vport.h"
-static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct nlattr *attr, int len);
-
struct deferred_action {
struct sk_buff *skb;
const struct nlattr *actions;
+ int actions_len;
/* Store pkt_key clone when creating deferred action. */
struct sw_flow_key pkt_key;
@@ -82,14 +79,31 @@ struct action_fifo {
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};
-struct recirc_keys {
+struct action_flow_keys {
struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
};
static struct action_fifo __percpu *action_fifos;
-static struct recirc_keys __percpu *recirc_keys;
+static struct action_flow_keys __percpu *flow_keys;
static DEFINE_PER_CPU(int, exec_actions_level);
+/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
+ * space. Return NULL if out of key spaces.
+ */
+static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
+{
+ struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
+ int level = this_cpu_read(exec_actions_level);
+ struct sw_flow_key *key = NULL;
+
+ if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+ key = &keys->key[level - 1];
+ *key = *key_;
+ }
+
+ return key;
+}
+
static void action_fifo_init(struct action_fifo *fifo)
{
fifo->head = 0;
@@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct nlattr *attr)
+ const struct sw_flow_key *key,
+ const struct nlattr *actions,
+ const int actions_len)
{
struct action_fifo *fifo;
struct deferred_action *da;
@@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
- da->actions = attr;
+ da->actions = actions;
+ da->actions_len = actions_len;
da->pkt_key = *key;
}
@@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
return !(key->mac_proto & SW_FLOW_KEY_INVALID);
}
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ u32 recirc_id,
+ const struct nlattr *actions, int len,
+ bool last, bool clone_flow_key);
+
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
__be16 ethertype)
{
@@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
}
+/* When 'last' is true, sample() should always consume the 'skb'.
+ * Otherwise, sample() should keep 'skb' intact regardless what
+ * actions are executed within sample().
+ */
static int sample(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
- const struct nlattr *actions, int actions_len)
+ bool last)
{
- const struct nlattr *acts_list = NULL;
- const struct nlattr *a;
- int rem;
- u32 cutlen = 0;
-
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
- u32 probability;
-
- switch (nla_type(a)) {
- case OVS_SAMPLE_ATTR_PROBABILITY:
- probability = nla_get_u32(a);
- if (!probability || prandom_u32() > probability)
- return 0;
- break;
-
- case OVS_SAMPLE_ATTR_ACTIONS:
- acts_list = a;
- break;
- }
- }
-
- rem = nla_len(acts_list);
- a = nla_data(acts_list);
-
- /* Actions list is empty, do nothing */
- if (unlikely(!rem))
+ struct nlattr *actions;
+ struct nlattr *sample_arg;
+ int rem = nla_len(attr);
+ const struct sample_arg *arg;
+ bool clone_flow_key;
+
+ /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
+ sample_arg = nla_data(attr);
+ arg = nla_data(sample_arg);
+ actions = nla_next(sample_arg, &rem);
+
+ if ((arg->probability != U32_MAX) &&
+ (!arg->probability || prandom_u32() > arg->probability)) {
+ if (last)
+ consume_skb(skb);
return 0;
-
- /* The only known usage of sample action is having a single user-space
- * action, or having a truncate action followed by a single user-space
- * action. Treat this usage as a special case.
- * The output_userspace() should clone the skb to be sent to the
- * user space. This skb will be consumed by its caller.
- */
- if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
- struct ovs_action_trunc *trunc = nla_data(a);
-
- if (skb->len > trunc->max_len)
- cutlen = skb->len - trunc->max_len;
-
- a = nla_next(a, &rem);
}
- if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
- nla_is_last(a, rem)))
- return output_userspace(dp, skb, key, a, actions,
- actions_len, cutlen);
-
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb)
- /* Skip the sample action when out of memory. */
- return 0;
-
- if (!add_deferred_actions(skb, key, a)) {
- if (net_ratelimit())
- pr_warn("%s: deferred actions limit reached, dropping sample action\n",
- ovs_dp_name(dp));
-
- kfree_skb(skb);
- }
- return 0;
+ clone_flow_key = !arg->exec;
+ return clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
}
static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
@@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb,
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
- const struct nlattr *a, int rem)
+ const struct nlattr *a, bool last)
{
- struct deferred_action *da;
- int level;
+ u32 recirc_id;
if (!is_flow_key_valid(key)) {
int err;
@@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
}
BUG_ON(!is_flow_key_valid(key));
- if (!nla_is_last(a, rem)) {
- /* Recirc action is the not the last action
- * of the action list, need to clone the skb.
- */
- skb = skb_clone(skb, GFP_ATOMIC);
-
- /* Skip the recirc action when out of memory, but
- * continue on with the rest of the action list.
- */
- if (!skb)
- return 0;
- }
-
- level = this_cpu_read(exec_actions_level);
- if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
- struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
- struct sw_flow_key *recirc_key = &rks->key[level - 1];
-
- *recirc_key = *key;
- recirc_key->recirc_id = nla_get_u32(a);
- ovs_dp_process_packet(skb, recirc_key);
-
- return 0;
- }
-
- da = add_deferred_actions(skb, key, NULL);
- if (da) {
- da->pkt_key.recirc_id = nla_get_u32(a);
- } else {
- kfree_skb(skb);
-
- if (net_ratelimit())
- pr_warn("%s: deferred action limit reached, drop recirc action\n",
- ovs_dp_name(dp));
- }
-
- return 0;
+ recirc_id = nla_get_u32(a);
+ return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
}
/* Execute a list of actions against 'skb'. */
@@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb, key);
break;
- case OVS_ACTION_ATTR_RECIRC:
- err = execute_recirc(dp, skb, key, a, rem);
- if (nla_is_last(a, rem)) {
+ case OVS_ACTION_ATTR_RECIRC: {
+ bool last = nla_is_last(a, rem);
+
+ err = execute_recirc(dp, skb, key, a, last);
+ if (last) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
@@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return err;
}
break;
+ }
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, key, nla_data(a));
@@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = execute_masked_set_action(skb, key, nla_data(a));
break;
- case OVS_ACTION_ATTR_SAMPLE:
- err = sample(dp, skb, key, a, attr, len);
+ case OVS_ACTION_ATTR_SAMPLE: {
+ bool last = nla_is_last(a, rem);
+
+ err = sample(dp, skb, key, a, last);
+ if (last)
+ return err;
+
break;
+ }
case OVS_ACTION_ATTR_CT:
if (!is_flow_key_valid(key)) {
@@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+/* Execute the actions on the clone of the packet. The effect of the
+ * execution does not affect the original 'skb' nor the original 'key'.
+ *
+ * The execution may be deferred in case the actions can not be executed
+ * immediately.
+ */
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 recirc_id,
+ const struct nlattr *actions, int len,
+ bool last, bool clone_flow_key)
+{
+ struct deferred_action *da;
+ struct sw_flow_key *clone;
+
+ skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
+ if (!skb) {
+ /* Out of memory, skip this action.
+ */
+ return 0;
+ }
+
+ /* When clone_flow_key is false, the 'key' will not be change
+ * by the actions, then the 'key' can be used directly.
+ * Otherwise, try to clone key from the next recursion level of
+ * 'flow_keys'. If clone is successful, execute the actions
+ * without deferring.
+ */
+ clone = clone_flow_key ? clone_key(key) : key;
+ if (clone) {
+ int err = 0;
+
+ if (actions) { /* Sample action */
+ if (clone_flow_key)
+ __this_cpu_inc(exec_actions_level);
+
+ err = do_execute_actions(dp, skb, clone,
+ actions, len);
+
+ if (clone_flow_key)
+ __this_cpu_dec(exec_actions_level);
+ } else { /* Recirc action */
+ clone->recirc_id = recirc_id;
+ ovs_dp_process_packet(skb, clone);
+ }
+ return err;
+ }
+
+ /* Out of 'flow_keys' space. Defer actions */
+ da = add_deferred_actions(skb, key, actions, len);
+ if (da) {
+ if (!actions) { /* Recirc action */
+ key = &da->pkt_key;
+ key->recirc_id = recirc_id;
+ }
+ } else {
+ /* Out of per CPU action FIFO space. Drop the 'skb' and
+ * log an error.
+ */
+ kfree_skb(skb);
+
+ if (net_ratelimit()) {
+ if (actions) { /* Sample action */
+ pr_warn("%s: deferred action limit reached, drop sample action\n",
+ ovs_dp_name(dp));
+ } else { /* Recirc action */
+ pr_warn("%s: deferred action limit reached, drop recirc action\n",
+ ovs_dp_name(dp));
+ }
+ }
+ }
+ return 0;
+}
+
static void process_deferred_actions(struct datapath *dp)
{
struct action_fifo *fifo = this_cpu_ptr(action_fifos);
@@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp)
struct sk_buff *skb = da->skb;
struct sw_flow_key *key = &da->pkt_key;
const struct nlattr *actions = da->actions;
+ int actions_len = da->actions_len;
if (actions)
- do_execute_actions(dp, skb, key, actions,
- nla_len(actions));
+ do_execute_actions(dp, skb, key, actions, actions_len);
else
ovs_dp_process_packet(skb, key);
} while (!action_fifo_is_empty(fifo));
@@ -1323,8 +1354,8 @@ int action_fifos_init(void)
if (!action_fifos)
return -ENOMEM;
- recirc_keys = alloc_percpu(struct recirc_keys);
- if (!recirc_keys) {
+ flow_keys = alloc_percpu(struct action_flow_keys);
+ if (!flow_keys) {
free_percpu(action_fifos);
return -ENOMEM;
}
@@ -1335,5 +1366,5 @@ int action_fifos_init(void)
void action_fifos_exit(void)
{
free_percpu(action_fifos);
- free_percpu(recirc_keys);
+ free_percpu(flow_keys);
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 7b2c2fce408a..4f7c3b5c080b 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -66,7 +66,9 @@ struct ovs_conntrack_info {
u8 commit : 1;
u8 nat : 3; /* enum ovs_ct_nat */
u8 force : 1;
+ u8 have_eventmask : 1;
u16 family;
+ u32 eventmask; /* Mask of 1 << IPCT_*. */
struct md_mark mark;
struct md_labels labels;
#ifdef CONFIG_NF_NAT_NEEDED
@@ -373,7 +375,7 @@ static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
}
/* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
- * IPCT_LABEL bit it set in the event cache.
+ * IPCT_LABEL bit is set in the event cache.
*/
nf_conntrack_event_cache(IPCT_LABEL, ct);
@@ -1007,6 +1009,20 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (!ct)
return 0;
+ /* Set the conntrack event mask if given. NEW and DELETE events have
+ * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
+ * typically would receive many kinds of updates. Setting the event
+ * mask allows those events to be filtered. The set event mask will
+ * remain in effect for the lifetime of the connection unless changed
+ * by a further CT action with both the commit flag and the eventmask
+ * option. */
+ if (info->have_eventmask) {
+ struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct);
+
+ if (cache)
+ cache->ctmask = info->eventmask;
+ }
+
/* Apply changes before confirming the connection so that the initial
* conntrack NEW netlink event carries the values given in the CT
* action.
@@ -1238,6 +1254,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
/* NAT length is checked when parsing the nested attributes. */
[OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
#endif
+ [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32),
+ .maxlen = sizeof(u32) },
};
static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
@@ -1316,6 +1334,11 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
break;
}
#endif
+ case OVS_CT_ATTR_EVENTMASK:
+ info->have_eventmask = true;
+ info->eventmask = nla_get_u32(a);
+ break;
+
default:
OVS_NLERR(log, "Unknown conntrack attr (%d)",
type);
@@ -1515,6 +1538,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
ct_info->helper->name))
return -EMSGSIZE;
}
+ if (ct_info->have_eventmask &&
+ nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
+ return -EMSGSIZE;
+
#ifdef CONFIG_NF_NAT_NEEDED
if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
return -EMSGSIZE;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9c62b6325f7a..7b17da9a94a0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1353,7 +1353,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err;
err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
- OVS_FLOW_ATTR_MAX, flow_policy);
+ OVS_FLOW_ATTR_MAX, flow_policy, NULL);
if (err)
return err;
ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1c6e9377436d..da931bdef8a7 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -34,8 +34,6 @@
#define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
-#define SAMPLE_ACTION_DEPTH 3
-
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
* datapath.
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 1105a838bab8..7e1d8a2afa63 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -59,6 +59,39 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+static bool actions_may_change_flow(const struct nlattr *actions)
+{
+ struct nlattr *nla;
+ int rem;
+
+ nla_for_each_nested(nla, actions, rem) {
+ u16 action = nla_type(nla);
+
+ switch (action) {
+ case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_RECIRC:
+ case OVS_ACTION_ATTR_TRUNC:
+ case OVS_ACTION_ATTR_USERSPACE:
+ break;
+
+ case OVS_ACTION_ATTR_CT:
+ case OVS_ACTION_ATTR_HASH:
+ case OVS_ACTION_ATTR_POP_ETH:
+ case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_VLAN:
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ case OVS_ACTION_ATTR_PUSH_MPLS:
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ case OVS_ACTION_ATTR_SAMPLE:
+ case OVS_ACTION_ATTR_SET:
+ case OVS_ACTION_ATTR_SET_MASKED:
+ default:
+ return true;
+ }
+ }
+ return false;
+}
+
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
{
@@ -2023,18 +2056,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
- int depth, struct sw_flow_actions **sfa,
+ struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key, int depth,
+ const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log)
+ __be16 eth_type, __be16 vlan_tci,
+ bool log, bool last)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
const struct nlattr *a;
- int rem, start, err, st_acts;
+ int rem, start, err;
+ struct sample_arg arg;
memset(attrs, 0, sizeof(attrs));
nla_for_each_nested(a, attr, rem) {
@@ -2058,20 +2093,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
- err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32), log);
+
+ /* When both skb and flow may be changed, put the sample
+ * into a deferred fifo. On the other hand, if only skb
+ * may be modified, the actions can be executed in place.
+ *
+ * Do this analysis at the flow installation time.
+ * Set 'clone_action->exec' to true if the actions can be
+ * executed without being deferred.
+ *
+ * If the sample is the last action, it can always be excuted
+ * rather than deferred.
+ */
+ arg.exec = last || !actions_may_change_flow(actions);
+ arg.probability = nla_get_u32(probability);
+
+ err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
+ log);
if (err)
return err;
- st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
- if (st_acts < 0)
- return st_acts;
- err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
+ err = __ovs_nla_copy_actions(net, actions, key, sfa,
eth_type, vlan_tci, log);
+
if (err)
return err;
- add_nested_action_end(*sfa, st_acts);
add_nested_action_end(*sfa, start);
return 0;
@@ -2380,8 +2427,8 @@ static int validate_userspace(const struct nlattr *attr)
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
- error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
- attr, userspace_policy);
+ error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
+ userspace_policy, NULL);
if (error)
return error;
@@ -2408,16 +2455,13 @@ static int copy_action(const struct nlattr *from,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
- int depth, struct sw_flow_actions **sfa,
+ struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
- if (depth >= SAMPLE_ACTION_DEPTH)
- return -EOVERFLOW;
-
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -2555,13 +2599,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
break;
- case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(net, a, key, depth, sfa,
- eth_type, vlan_tci, log);
+ case OVS_ACTION_ATTR_SAMPLE: {
+ bool last = nla_is_last(a, rem);
+
+ err = validate_and_copy_sample(net, a, key, sfa,
+ eth_type, vlan_tci,
+ log, last);
if (err)
return err;
skip_copy = true;
break;
+ }
case OVS_ACTION_ATTR_CT:
err = ovs_ct_copy_action(net, a, key, sfa, log);
@@ -2615,7 +2663,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return PTR_ERR(*sfa);
(*sfa)->orig_len = nla_len(attr);
- err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
+ err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
key->eth.vlan.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
@@ -2623,39 +2671,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
}
-static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+static int sample_action_to_attr(const struct nlattr *attr,
+ struct sk_buff *skb)
{
- const struct nlattr *a;
- struct nlattr *start;
- int err = 0, rem;
+ struct nlattr *start, *ac_start = NULL, *sample_arg;
+ int err = 0, rem = nla_len(attr);
+ const struct sample_arg *arg;
+ struct nlattr *actions;
start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
if (!start)
return -EMSGSIZE;
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- struct nlattr *st_sample;
+ sample_arg = nla_data(attr);
+ arg = nla_data(sample_arg);
+ actions = nla_next(sample_arg, &rem);
- switch (type) {
- case OVS_SAMPLE_ATTR_PROBABILITY:
- if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
- sizeof(u32), nla_data(a)))
- return -EMSGSIZE;
- break;
- case OVS_SAMPLE_ATTR_ACTIONS:
- st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
- if (!st_sample)
- return -EMSGSIZE;
- err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
- if (err)
- return err;
- nla_nest_end(skb, st_sample);
- break;
- }
+ if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+ if (!ac_start) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ err = ovs_nla_put_actions(actions, rem, skb);
+
+out:
+ if (err) {
+ nla_nest_cancel(skb, ac_start);
+ nla_nest_cancel(skb, start);
+ } else {
+ nla_nest_end(skb, ac_start);
+ nla_nest_end(skb, start);
}
- nla_nest_end(skb, start);
return err;
}
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 7eb955e453e6..869acb3b3d3f 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -70,7 +70,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
if (nla_len(attr) < sizeof(struct nlattr))
return -EINVAL;
- err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+ err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy,
+ NULL);
if (err < 0)
return err;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ea81ccf3c7d6..f4001763134d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1496,6 +1496,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
DEFINE_MUTEX(fanout_mutex);
EXPORT_SYMBOL_GPL(fanout_mutex);
static LIST_HEAD(fanout_list);
+static u16 fanout_next_id;
static void __fanout_link(struct sock *sk, struct packet_sock *po)
{
@@ -1629,6 +1630,36 @@ static void fanout_release_data(struct packet_fanout *f)
};
}
+static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
+{
+ struct packet_fanout *f;
+
+ list_for_each_entry(f, &fanout_list, list) {
+ if (f->id == candidate_id &&
+ read_pnet(&f->net) == sock_net(sk)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
+{
+ u16 id = fanout_next_id;
+
+ do {
+ if (__fanout_id_is_free(sk, id)) {
+ *new_id = id;
+ fanout_next_id = id + 1;
+ return true;
+ }
+
+ id++;
+ } while (id != fanout_next_id);
+
+ return false;
+}
+
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_rollover *rollover = NULL;
@@ -1676,6 +1707,19 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
po->rollover = rollover;
}
+ if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
+ if (id != 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ if (!fanout_find_new_id(sk, &id)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /* ephemeral flag for the first socket in the group: drop it */
+ flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
+ }
+
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
if (f->id == id &&
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index bc5ee5fbe6ae..45b3af3080d8 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -61,7 +61,8 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
[IFA_LOCAL] = { .type = NLA_U8 },
};
-static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -78,7 +79,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy);
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy,
+ extack);
if (err < 0)
return err;
@@ -226,7 +228,8 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
[RTA_OIF] = { .type = NLA_U32 },
};
-static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[RTA_MAX+1];
@@ -243,7 +246,8 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy,
+ extack);
if (err < 0)
return err;
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index b83c6807a5ae..326fd97444f5 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -16,7 +16,7 @@ if QRTR
config QRTR_SMD
tristate "SMD IPC Router channels"
- depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+ depends on RPMSG || (COMPILE_TEST && RPMSG=n)
---help---
Say Y here to support SMD based ipcrouter channels. SMD is the
most common transport for IPC Router.
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 9da7368b0140..a9a8c7d5a4a9 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -945,7 +945,8 @@ static const struct nla_policy qrtr_policy[IFA_MAX + 1] = {
[IFA_LOCAL] = { .type = NLA_U32 },
};
-static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFA_MAX + 1];
struct ifaddrmsg *ifm;
@@ -959,7 +960,7 @@ static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
ASSERT_RTNL();
- rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy);
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy, extack);
if (rc < 0)
return rc;
diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
index 0d11132b3370..50615d5efac1 100644
--- a/net/qrtr/smd.c
+++ b/net/qrtr/smd.c
@@ -14,21 +14,21 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
#include "qrtr.h"
struct qrtr_smd_dev {
struct qrtr_endpoint ep;
- struct qcom_smd_channel *channel;
+ struct rpmsg_endpoint *channel;
struct device *dev;
};
/* from smd to qrtr */
-static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel,
- const void *data, size_t len)
+static int qcom_smd_qrtr_callback(struct rpmsg_device *rpdev,
+ void *data, int len, void *priv, u32 addr)
{
- struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel);
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
int rc;
if (!qdev)
@@ -54,7 +54,7 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
if (rc)
goto out;
- rc = qcom_smd_send(qdev->channel, skb->data, skb->len);
+ rc = rpmsg_send(qdev->channel, skb->data, skb->len);
out:
if (rc)
@@ -64,57 +64,55 @@ out:
return rc;
}
-static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev)
+static int qcom_smd_qrtr_probe(struct rpmsg_device *rpdev)
{
struct qrtr_smd_dev *qdev;
int rc;
- qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL);
+ qdev = devm_kzalloc(&rpdev->dev, sizeof(*qdev), GFP_KERNEL);
if (!qdev)
return -ENOMEM;
- qdev->channel = sdev->channel;
- qdev->dev = &sdev->dev;
+ qdev->channel = rpdev->ept;
+ qdev->dev = &rpdev->dev;
qdev->ep.xmit = qcom_smd_qrtr_send;
rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO);
if (rc)
return rc;
- qcom_smd_set_drvdata(sdev->channel, qdev);
- dev_set_drvdata(&sdev->dev, qdev);
+ dev_set_drvdata(&rpdev->dev, qdev);
- dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n");
+ dev_dbg(&rpdev->dev, "Qualcomm SMD QRTR driver probed\n");
return 0;
}
-static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev)
+static void qcom_smd_qrtr_remove(struct rpmsg_device *rpdev)
{
- struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
qrtr_endpoint_unregister(&qdev->ep);
- dev_set_drvdata(&sdev->dev, NULL);
+ dev_set_drvdata(&rpdev->dev, NULL);
}
-static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = {
+static const struct rpmsg_device_id qcom_smd_qrtr_smd_match[] = {
{ "IPCRTR" },
{}
};
-static struct qcom_smd_driver qcom_smd_qrtr_driver = {
+static struct rpmsg_driver qcom_smd_qrtr_driver = {
.probe = qcom_smd_qrtr_probe,
.remove = qcom_smd_qrtr_remove,
.callback = qcom_smd_qrtr_callback,
- .smd_match_table = qcom_smd_qrtr_smd_match,
- .driver = {
+ .id_table = qcom_smd_qrtr_smd_match,
+ .drv = {
.name = "qcom_smd_qrtr",
- .owner = THIS_MODULE,
},
};
-module_qcom_smd_driver(qcom_smd_qrtr_driver);
+module_rpmsg_driver(qcom_smd_qrtr_driver);
MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
MODULE_LICENSE("GPL v2");
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 1fa75ab7b733..6a5ebdea7d2e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
rds_conn_path_reset(cp);
if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
+ RDS_CONN_DOWN) &&
+ !rds_conn_path_transition(cp, RDS_CONN_ERROR,
RDS_CONN_DOWN)) {
/* This can happen - eg when we're in the middle of tearing
* down the connection, and someone unloads the rds module.
- * Quite reproduceable with loopback connections.
+ * Quite reproducible with loopback connections.
* Mostly harmless.
+ *
+ * Note that this also happens with rds-tcp because
+ * we could have triggered rds_conn_path_drop in irq
+ * mode from rds_tcp_state change on the receipt of
+ * a FIN, thus we need to recheck for RDS_CONN_ERROR
+ * here.
*/
rds_conn_path_error(cp, "%s: failed to transition "
"to state DOWN, current state "
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 1c38d2c7caa8..80fb6f63e768 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -702,9 +702,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
event->param.conn.initiator_depth);
/* rdma_accept() calls rdma_reject() internally if it fails */
- err = rdma_accept(cm_id, &conn_param);
- if (err)
- rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
+ if (rdma_accept(cm_id, &conn_param))
+ rds_ib_conn_error(conn, "rdma_accept failed\n");
out:
if (conn)
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 4fe8f4fec4ee..86ef907067bb 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -78,17 +78,15 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
return ibmr;
out_no_cigar:
- if (ibmr) {
- if (fmr->fmr)
- ib_dealloc_fmr(fmr->fmr);
- kfree(ibmr);
- }
+ kfree(ibmr);
atomic_dec(&pool->item_count);
+
return ERR_PTR(err);
}
-int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
- struct scatterlist *sg, unsigned int nents)
+static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
+ struct rds_ib_mr *ibmr, struct scatterlist *sg,
+ unsigned int nents)
{
struct ib_device *dev = rds_ibdev->dev;
struct rds_ib_fmr *fmr = &ibmr->u.fmr;
@@ -114,29 +112,39 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
if (dma_addr & ~PAGE_MASK) {
- if (i > 0)
+ if (i > 0) {
+ ib_dma_unmap_sg(dev, sg, nents,
+ DMA_BIDIRECTIONAL);
return -EINVAL;
- else
+ } else {
++page_cnt;
+ }
}
if ((dma_addr + dma_len) & ~PAGE_MASK) {
- if (i < sg_dma_len - 1)
+ if (i < sg_dma_len - 1) {
+ ib_dma_unmap_sg(dev, sg, nents,
+ DMA_BIDIRECTIONAL);
return -EINVAL;
- else
+ } else {
++page_cnt;
+ }
}
len += dma_len;
}
page_cnt += len >> PAGE_SHIFT;
- if (page_cnt > ibmr->pool->fmr_attr.max_pages)
+ if (page_cnt > ibmr->pool->fmr_attr.max_pages) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -EINVAL;
+ }
dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
rdsibdev_to_node(rds_ibdev));
- if (!dma_pages)
+ if (!dma_pages) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -ENOMEM;
+ }
page_cnt = 0;
for (i = 0; i < sg_dma_len; ++i) {
@@ -149,8 +157,10 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
}
ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
- if (ret)
+ if (ret) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
goto out;
+ }
/* Success - we successfully remapped the MR, so we can
* safely tear down the old mapping.
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5d6e98a79a5e..0ea4ab017a8c 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -125,8 +125,6 @@ void rds_ib_mr_exit(void);
void __rds_ib_teardown_mr(struct rds_ib_mr *);
void rds_ib_teardown_mr(struct rds_ib_mr *);
struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int);
-int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *,
- struct scatterlist *, unsigned int);
struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
diff --git a/net/rds/threads.c b/net/rds/threads.c
index e36e333a0aa0..3e447d056d09 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work)
struct rds_connection *conn = cp->cp_conn;
int ret;
- if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+ if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
return;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 26a7b1db1361..7486926e60a8 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -740,6 +740,25 @@ static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
}
/*
+ * Abort a call due to a protocol error.
+ */
+static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ const char *eproto_why,
+ const char *why,
+ u32 abort_code)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
+ return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+}
+
+#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
+ __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
+ (abort_why), (abort_code))
+
+/*
* conn_client.c
*/
extern unsigned int rxrpc_max_client_connections;
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0ed181f53f32..1752fcf8e8f1 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -413,11 +413,11 @@ found_service:
case RXRPC_CONN_REMOTELY_ABORTED:
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->remote_abort, ECONNABORTED);
+ conn->remote_abort, -ECONNABORTED);
break;
case RXRPC_CONN_LOCALLY_ABORTED:
rxrpc_abort_call("CON", call, sp->hdr.seq,
- conn->local_abort, ECONNABORTED);
+ conn->local_abort, -ECONNABORTED);
break;
default:
BUG();
@@ -600,7 +600,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
write_lock_bh(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_SERVER_ACCEPTING:
- __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED);
+ __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED);
abort = true;
/* fall through */
case RXRPC_CALL_COMPLETE:
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 97a17ada4431..7a77844aab16 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
now = ktime_get_real();
if (ktime_before(call->expire_at, now)) {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d79cd36987a9..47f7f4205653 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -486,7 +486,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->to_be_accepted.next,
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
- rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_put_call(call, rxrpc_call_put);
}
@@ -494,7 +494,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_got);
- rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index c3be03e8d098..e8dea0d49e7f 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -550,6 +550,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
+ trace_rxrpc_connect_call(call);
_net("CONNECT call %08x:%08x as call %d on conn %d",
call->cid, call->call_id, call->debug_id, conn->debug_id);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b099b64366f3..46babcf82ce8 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -168,7 +168,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
* generate a connection-level abort
*/
static int rxrpc_abort_connection(struct rxrpc_connection *conn,
- u32 error, u32 abort_code)
+ int error, u32 abort_code)
{
struct rxrpc_wire_header whdr;
struct msghdr msg;
@@ -281,14 +281,17 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
case RXRPC_PACKET_TYPE_ABORT:
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &wtmp, sizeof(wtmp)) < 0)
+ &wtmp, sizeof(wtmp)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_abort"));
return -EPROTO;
+ }
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, ECONNABORTED);
+ abort_code, -ECONNABORTED);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -327,7 +330,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return 0;
default:
- _leave(" = -EPROTO [%u]", sp->hdr.type);
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_conn_pkt"));
return -EPROTO;
}
}
@@ -370,7 +374,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn)
abort:
_debug("abort %d, %d", ret, abort_code);
- rxrpc_abort_connection(conn, -ret, abort_code);
+ rxrpc_abort_connection(conn, ret, abort_code);
_leave(" [aborted]");
}
@@ -419,9 +423,8 @@ requeue_and_leave:
goto out;
protocol_error:
- if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
+ if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
- _leave(" [EPROTO]");
goto out;
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 18b2ad8be8e2..45dba732a3b4 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -30,7 +30,7 @@
static void rxrpc_proto_abort(const char *why,
struct rxrpc_call *call, rxrpc_seq_t seq)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
+ if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) {
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
rxrpc_queue_call(call);
}
@@ -665,6 +665,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
rwind = RXRPC_RXTX_BUFF_SIZE - 1;
if (rwind > call->tx_winsize)
wake = true;
+ trace_rxrpc_rx_rwind_change(call, sp->hdr.serial,
+ ntohl(ackinfo->rwind), wake);
call->tx_winsize = rwind;
}
@@ -877,7 +879,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
}
/*
- * Process an ABORT packet.
+ * Process an ABORT packet directed at a call.
*/
static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
{
@@ -892,10 +894,12 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
&wtmp, sizeof(wtmp)) >= 0)
abort_code = ntohl(wtmp);
+ trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code);
+
_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, ECONNABORTED))
+ abort_code, -ECONNABORTED))
rxrpc_notify_socket(call);
}
@@ -958,7 +962,7 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) {
+ if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) {
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
rxrpc_queue_call(call);
}
@@ -1017,8 +1021,11 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
struct rxrpc_wire_header whdr;
/* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_hdr"));
return -EBADMSG;
+ }
memset(sp, 0, sizeof(*sp));
sp->hdr.epoch = ntohl(whdr.epoch);
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 7d4375e557e6..af276f173b10 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -46,7 +46,10 @@ static int none_respond_to_challenge(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
- *_abort_code = RX_PROTOCOL_ERROR;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("chall_none"));
return -EPROTO;
}
@@ -54,7 +57,10 @@ static int none_verify_response(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
- *_abort_code = RX_PROTOCOL_ERROR;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("resp_none"));
return -EPROTO;
}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index bf13b8470c9a..1ed9c0c2e94f 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -296,7 +296,7 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
hlist_del_init(&call->error_link);
rxrpc_see_call(call);
- if (rxrpc_set_call_completion(call, compl, 0, error))
+ if (rxrpc_set_call_completion(call, compl, 0, -error))
rxrpc_notify_socket(call);
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 3e2f1a8e9c5b..f9caf3b77509 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -83,11 +83,11 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
break;
case RXRPC_CALL_NETWORK_ERROR:
- tmp = call->error;
+ tmp = -call->error;
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
break;
case RXRPC_CALL_LOCAL_ERROR:
- tmp = call->error;
+ tmp = -call->error;
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
break;
default:
@@ -682,14 +682,16 @@ out:
return ret;
short_data:
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data"));
ret = -EBADMSG;
goto out;
excess_data:
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data"));
ret = -EMSGSIZE;
goto out;
call_complete:
*_abort = call->abort_code;
- ret = -call->error;
+ ret = call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
ret = 1;
if (size > 0)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 4374e7b9c7bf..1bb9b2ccc267 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -148,15 +148,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
u32 data_size,
void *sechdr)
{
- struct rxrpc_skb_priv *sp;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxkad_level1_hdr hdr;
struct rxrpc_crypt iv;
struct scatterlist sg;
u16 check;
- sp = rxrpc_skb(skb);
-
_enter("");
check = sp->hdr.seq ^ call->call_id;
@@ -323,6 +321,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_crypt iv;
struct scatterlist sg[16];
struct sk_buff *trailer;
+ bool aborted;
u32 data_size, buf;
u16 check;
int nsg;
@@ -330,7 +329,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
_enter("");
if (len < 8) {
- rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
+ RXKADSEALEDINCON);
goto protocol_error;
}
@@ -355,7 +355,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
/* Extract the decrypted packet length */
if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
- rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
+ RXKADDATALEN);
goto protocol_error;
}
offset += sizeof(sechdr);
@@ -368,12 +369,14 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C",
+ RXKADSEALEDINCON);
goto protocol_error;
}
if (data_size > len) {
- rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
+ RXKADDATALEN);
goto protocol_error;
}
@@ -381,8 +384,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO");
+ if (aborted)
+ rxrpc_send_abort_packet(call);
return -EPROTO;
nomem:
@@ -403,6 +406,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
struct sk_buff *trailer;
+ bool aborted;
u32 data_size, buf;
u16 check;
int nsg;
@@ -410,7 +414,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
_enter(",{%d}", skb->len);
if (len < 8) {
- rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
+ RXKADSEALEDINCON);
goto protocol_error;
}
@@ -445,7 +450,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
/* Extract the decrypted packet length */
if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
- rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
+ RXKADDATALEN);
goto protocol_error;
}
offset += sizeof(sechdr);
@@ -458,12 +464,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C",
+ RXKADSEALEDINCON);
goto protocol_error;
}
if (data_size > len) {
- rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
+ RXKADDATALEN);
goto protocol_error;
}
@@ -471,8 +479,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO");
+ if (aborted)
+ rxrpc_send_abort_packet(call);
return -EPROTO;
nomem:
@@ -491,6 +499,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg;
+ bool aborted;
u16 cksum;
u32 x, y;
@@ -522,10 +531,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
cksum = 1; /* zero checksums are not permitted */
if (cksum != expected_cksum) {
- rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO [csum failed]");
- return -EPROTO;
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
+ RXKADSEALEDINCON);
+ goto protocol_error;
}
switch (call->conn->params.security_level) {
@@ -538,6 +546,11 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
default:
return -ENOANO;
}
+
+protocol_error:
+ if (aborted)
+ rxrpc_send_abort_packet(call);
+ return -EPROTO;
}
/*
@@ -754,22 +767,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
struct rxkad_response resp
__attribute__((aligned(8))); /* must be aligned for crypto */
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ const char *eproto;
u32 version, nonce, min_level, abort_code;
int ret;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
- if (!conn->params.key) {
- _leave(" = -EPROTO [no key]");
- return -EPROTO;
- }
+ eproto = tracepoint_string("chall_no_key");
+ abort_code = RX_PROTOCOL_ERROR;
+ if (!conn->params.key)
+ goto protocol_error;
+ abort_code = RXKADEXPIRED;
ret = key_validate(conn->params.key);
- if (ret < 0) {
- *_abort_code = RXKADEXPIRED;
- return ret;
- }
+ if (ret < 0)
+ goto other_error;
+ eproto = tracepoint_string("chall_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&challenge, sizeof(challenge)) < 0)
@@ -782,13 +796,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
_proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
sp->hdr.serial, version, nonce, min_level);
+ eproto = tracepoint_string("chall_ver");
abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
goto protocol_error;
abort_code = RXKADLEVELFAIL;
+ ret = -EACCES;
if (conn->params.security_level < min_level)
- goto protocol_error;
+ goto other_error;
token = conn->params.key->payload.data[0];
@@ -815,28 +831,34 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
return rxkad_send_response(conn, &sp->hdr, &resp, token->kad);
protocol_error:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
+ ret = -EPROTO;
+other_error:
*_abort_code = abort_code;
- _leave(" = -EPROTO [%d]", abort_code);
- return -EPROTO;
+ return ret;
}
/*
* decrypt the kerberos IV ticket in the response
*/
static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
void *ticket, size_t ticket_len,
struct rxrpc_crypt *_session_key,
time_t *_expiry,
u32 *_abort_code)
{
struct skcipher_request *req;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv, key;
struct scatterlist sg[1];
struct in_addr addr;
unsigned int life;
+ const char *eproto;
time_t issue, now;
bool little_endian;
int ret;
+ u32 abort_code;
u8 *p, *q, *name, *end;
_enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
@@ -847,11 +869,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
if (ret < 0) {
switch (ret) {
case -EKEYEXPIRED:
- *_abort_code = RXKADEXPIRED;
- goto error;
+ abort_code = RXKADEXPIRED;
+ goto other_error;
default:
- *_abort_code = RXKADNOAUTH;
- goto error;
+ abort_code = RXKADNOAUTH;
+ goto other_error;
}
}
@@ -860,13 +882,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
+ ret = -ENOMEM;
req = skcipher_request_alloc(conn->server_key->payload.data[0],
GFP_NOFS);
- if (!req) {
- *_abort_code = RXKADNOAUTH;
- ret = -ENOMEM;
- goto error;
- }
+ if (!req)
+ goto temporary_error;
sg_init_one(&sg[0], ticket, ticket_len);
skcipher_request_set_callback(req, 0, NULL, NULL);
@@ -877,11 +897,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p = ticket;
end = p + ticket_len;
-#define Z(size) \
+#define Z(field) \
({ \
u8 *__str = p; \
+ eproto = tracepoint_string("rxkad_bad_"#field); \
q = memchr(p, 0, end - p); \
- if (!q || q - p > (size)) \
+ if (!q || q - p > (field##_SZ)) \
goto bad_ticket; \
for (; p < q; p++) \
if (!isprint(*p)) \
@@ -896,17 +917,18 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p++;
/* extract the authentication name */
- name = Z(ANAME_SZ);
+ name = Z(ANAME);
_debug("KIV ANAME: %s", name);
/* extract the principal's instance */
- name = Z(INST_SZ);
+ name = Z(INST);
_debug("KIV INST : %s", name);
/* extract the principal's authentication domain */
- name = Z(REALM_SZ);
+ name = Z(REALM);
_debug("KIV REALM: %s", name);
+ eproto = tracepoint_string("rxkad_bad_len");
if (end - p < 4 + 8 + 4 + 2)
goto bad_ticket;
@@ -941,36 +963,37 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
/* check the ticket is in date */
if (issue > now) {
- *_abort_code = RXKADNOAUTH;
+ abort_code = RXKADNOAUTH;
ret = -EKEYREJECTED;
- goto error;
+ goto other_error;
}
if (issue < now - life) {
- *_abort_code = RXKADEXPIRED;
+ abort_code = RXKADEXPIRED;
ret = -EKEYEXPIRED;
- goto error;
+ goto other_error;
}
*_expiry = issue + life;
/* get the service name */
- name = Z(SNAME_SZ);
+ name = Z(SNAME);
_debug("KIV SNAME: %s", name);
/* get the service instance name */
- name = Z(INST_SZ);
+ name = Z(INST);
_debug("KIV SINST: %s", name);
-
- ret = 0;
-error:
- _leave(" = %d", ret);
- return ret;
+ return 0;
bad_ticket:
- *_abort_code = RXKADBADTICKET;
- ret = -EBADMSG;
- goto error;
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
+ abort_code = RXKADBADTICKET;
+ ret = -EPROTO;
+other_error:
+ *_abort_code = abort_code;
+ return ret;
+temporary_error:
+ return ret;
}
/*
@@ -1020,6 +1043,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
__attribute__((aligned(8))); /* must be aligned for crypto */
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
+ const char *eproto;
time_t expiry;
void *ticket;
u32 abort_code, version, kvno, ticket_len, level;
@@ -1028,6 +1052,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+ eproto = tracepoint_string("rxkad_rsp_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&response, sizeof(response)) < 0)
@@ -1041,40 +1066,43 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
sp->hdr.serial, version, kvno, ticket_len);
+ eproto = tracepoint_string("rxkad_rsp_ver");
abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
goto protocol_error;
+ eproto = tracepoint_string("rxkad_rsp_tktlen");
abort_code = RXKADTICKETLEN;
if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
goto protocol_error;
+ eproto = tracepoint_string("rxkad_rsp_unkkey");
abort_code = RXKADUNKNOWNKEY;
if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
goto protocol_error;
/* extract the kerberos ticket and decrypt and decode it */
+ ret = -ENOMEM;
ticket = kmalloc(ticket_len, GFP_NOFS);
if (!ticket)
- return -ENOMEM;
+ goto temporary_error;
+ eproto = tracepoint_string("rxkad_tkt_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
ticket, ticket_len) < 0)
goto protocol_error_free;
- ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
- &expiry, &abort_code);
- if (ret < 0) {
- *_abort_code = abort_code;
- kfree(ticket);
- return ret;
- }
+ ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key,
+ &expiry, _abort_code);
+ if (ret < 0)
+ goto temporary_error_free;
/* use the session key from inside the ticket to decrypt the
* response */
rxkad_decrypt_response(conn, &response, &session_key);
+ eproto = tracepoint_string("rxkad_rsp_param");
abort_code = RXKADSEALEDINCON;
if (ntohl(response.encrypted.epoch) != conn->proto.epoch)
goto protocol_error_free;
@@ -1085,6 +1113,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
csum = response.encrypted.checksum;
response.encrypted.checksum = 0;
rxkad_calc_response_checksum(&response);
+ eproto = tracepoint_string("rxkad_rsp_csum");
if (response.encrypted.checksum != csum)
goto protocol_error_free;
@@ -1093,11 +1122,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
struct rxrpc_call *call;
u32 call_id = ntohl(response.encrypted.call_id[i]);
+ eproto = tracepoint_string("rxkad_rsp_callid");
if (call_id > INT_MAX)
goto protocol_error_unlock;
+ eproto = tracepoint_string("rxkad_rsp_callctr");
if (call_id < conn->channels[i].call_counter)
goto protocol_error_unlock;
+
+ eproto = tracepoint_string("rxkad_rsp_callst");
if (call_id > conn->channels[i].call_counter) {
call = rcu_dereference_protected(
conn->channels[i].call,
@@ -1109,10 +1142,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
}
spin_unlock(&conn->channel_lock);
+ eproto = tracepoint_string("rxkad_rsp_seq");
abort_code = RXKADOUTOFSEQUENCE;
if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
goto protocol_error_free;
+ eproto = tracepoint_string("rxkad_rsp_level");
abort_code = RXKADLEVELFAIL;
level = ntohl(response.encrypted.level);
if (level > RXRPC_SECURITY_ENCRYPT)
@@ -1123,10 +1158,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
* this the connection security can be handled in exactly the same way
* as for a client connection */
ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
- if (ret < 0) {
- kfree(ticket);
- return ret;
- }
+ if (ret < 0)
+ goto temporary_error_free;
kfree(ticket);
_leave(" = 0");
@@ -1137,9 +1170,18 @@ protocol_error_unlock:
protocol_error_free:
kfree(ticket);
protocol_error:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
*_abort_code = abort_code;
- _leave(" = -EPROTO [%d]", abort_code);
return -EPROTO;
+
+temporary_error_free:
+ kfree(ticket);
+temporary_error:
+ /* Ignore the response packet if we got a temporary error such as
+ * ENOMEM. We just want to send the challenge again. Note that we
+ * also come out this way if the ticket decryption fails.
+ */
+ return ret;
}
/*
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 97ab214ca411..96ffa5d5733b 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -556,7 +556,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = -ESHUTDOWN;
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
ret = 0;
- if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
+ if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED))
ret = rxrpc_send_abort_packet(call);
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
@@ -623,7 +623,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
read_unlock_bh(&call->state_lock);
break;
default:
- /* Request phase complete for this client call */
+ /* Request phase complete for this client call */
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send"));
ret = -EPROTO;
break;
}
@@ -642,20 +643,24 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
* @error: Local error value
* @why: 3-char string indicating why.
*
- * Allow a kernel service to abort a call, if it's still in an abortable state.
+ * Allow a kernel service to abort a call, if it's still in an abortable state
+ * and return true if the call was aborted, false if it was already complete.
*/
-void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
+bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
u32 abort_code, int error, const char *why)
{
+ bool aborted;
+
_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
mutex_lock(&call->user_mutex);
- if (rxrpc_abort_call(why, call, 0, abort_code, error))
+ aborted = rxrpc_abort_call(why, call, 0, abort_code, error);
+ if (aborted)
rxrpc_send_abort_packet(call);
mutex_unlock(&call->user_mutex);
- _leave("");
+ return aborted;
}
EXPORT_SYMBOL(rxrpc_kernel_abort_call);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 403790cce7d2..9fb84f0de6af 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -352,6 +352,51 @@ config NET_SCH_PLUG
To compile this code as a module, choose M here: the
module will be called sch_plug.
+menuconfig NET_SCH_DEFAULT
+ bool "Allow override default queue discipline"
+ ---help---
+ Support for selection of default queuing discipline.
+
+ Nearly all users can safely say no here, and the default
+ of pfifo_fast will be used. Many distributions already set
+ the default value via /proc/sys/net/core/default_qdisc.
+
+ If unsure, say N.
+
+if NET_SCH_DEFAULT
+
+choice
+ prompt "Default queuing discipline"
+ default DEFAULT_PFIFO_FAST
+ help
+ Select the queueing discipline that will be used by default
+ for all network devices.
+
+ config DEFAULT_FQ
+ bool "Fair Queue" if NET_SCH_FQ
+
+ config DEFAULT_CODEL
+ bool "Controlled Delay" if NET_SCH_CODEL
+
+ config DEFAULT_FQ_CODEL
+ bool "Fair Queue Controlled Delay" if NET_SCH_FQ_CODEL
+
+ config DEFAULT_SFQ
+ bool "Stochastic Fair Queue" if NET_SCH_SFQ
+
+ config DEFAULT_PFIFO_FAST
+ bool "Priority FIFO Fast"
+endchoice
+
+config DEFAULT_NET_SCH
+ string
+ default "pfifo_fast" if DEFAULT_PFIFO_FAST
+ default "fq" if DEFAULT_FQ
+ default "fq_codel" if DEFAULT_FQ_CODEL
+ default "sfq" if DEFAULT_SFQ
+ default "pfifo_fast"
+endif
+
comment "Classification"
config NET_CLS
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e05b924618a0..7f2cd702bb27 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -428,24 +428,49 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
return res;
}
+/*TCA_ACT_MAX_PRIO is 32, there count upto 32 */
+#define TCA_ACT_MAX_PRIO_MASK 0x1FF
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res)
{
int ret = -1, i;
+ u32 jmp_prgcnt = 0;
+ u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
if (skb_skip_tc_classify(skb))
return TC_ACT_OK;
+restart_act_graph:
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
+ if (jmp_prgcnt > 0) {
+ jmp_prgcnt -= 1;
+ continue;
+ }
repeat:
ret = a->ops->act(skb, a, res);
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
+
+ if (ret & TC_ACT_JUMP) {
+ jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
+ if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
+ /* faulty opcode, stop pipeline */
+ return TC_ACT_OK;
+ } else {
+ jmp_ttl -= 1;
+ if (jmp_ttl > 0)
+ goto restart_act_graph;
+ else /* faulty graph, stop pipeline */
+ return TC_ACT_OK;
+ }
+ }
+
if (ret != TC_ACT_PIPE)
break;
}
+
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
@@ -558,7 +583,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
int err;
if (name == NULL) {
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
@@ -663,7 +688,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
int err;
int i;
- err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
if (err < 0)
return err;
@@ -795,7 +820,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
int index;
int err;
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
if (err < 0)
goto err_out;
@@ -844,7 +869,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
b = skb_tail_pointer(skb);
- err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+ err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
if (err < 0)
goto err_out;
@@ -930,7 +955,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
struct tc_action *act;
LIST_HEAD(actions);
- ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+ ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
if (ret < 0)
return ret;
@@ -1002,7 +1027,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return tcf_add_notify(net, n, &actions, portid);
}
-static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ACT_MAX + 1];
@@ -1013,7 +1039,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
+ ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+ extack);
if (ret < 0)
return ret;
@@ -1060,19 +1087,20 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
- if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
+ if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
+ NULL, NULL) < 0)
return NULL;
tb1 = nla[TCA_ACT_TAB];
if (tb1 == NULL)
return NULL;
if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
- NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
+ NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0)
return NULL;
if (tb[1] == NULL)
return NULL;
- if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0)
+ if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0)
return NULL;
kind = tb2[TCA_ACT_KIND];
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 520baa41cba3..d33947d6e9d0 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -283,7 +283,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+ ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL);
if (ret < 0)
return ret;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index f9bb43c25697..2155bc6c6a1e 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -109,7 +109,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
+ ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy,
+ NULL);
if (ret < 0)
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e978ccd4402c..ab6fdbd34db7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -59,7 +59,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
+ err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL);
if (err < 0)
return err;
@@ -181,6 +181,9 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl,
struct tcphdr *tcph;
const struct iphdr *iph;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+ return 1;
+
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
@@ -202,6 +205,9 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl,
struct tcphdr *tcph;
const struct ipv6hdr *ip6h;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+ return 1;
+
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
@@ -225,6 +231,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
const struct iphdr *iph;
u16 ul;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ return 1;
+
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
@@ -278,6 +287,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
const struct ipv6hdr *ip6h;
u16 ul;
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ return 1;
+
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e6c874a2b283..99afe8b1f1fb 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -73,7 +73,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy);
+ err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 71e7ff22f7c9..c5dec308b8b1 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -443,7 +443,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
int ret = 0;
int err;
- err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
+ err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL);
if (err < 0)
return err;
@@ -514,7 +514,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
- NULL);
+ NULL, NULL);
if (err) {
metadata_parse_err:
if (exists)
@@ -603,8 +603,8 @@ nla_put_failure:
return -1;
}
-int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
- u16 metaid, u16 mlen, void *mdata)
+static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
+ u16 metaid, u16 mlen, void *mdata)
{
struct tcf_meta_info *e;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 992ef8d624f1..36f0ced9e60c 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -107,7 +107,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy);
+ err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index af49c7dca860..1b5549ababd4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -87,7 +87,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
+ ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
if (ret < 0)
return ret;
if (tb[TCA_MIRRED_PARMS] == NULL)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9b6aec665495..9016ab8a0649 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -50,7 +50,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
+ err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c1310472f620..164b5ac094be 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -72,7 +72,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
}
err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka,
- pedit_key_ex_policy);
+ pedit_key_ex_policy, NULL);
if (err)
goto err_out;
@@ -147,7 +147,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy);
+ err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0ba91d1ce994..f42008b29311 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -90,7 +90,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
+ err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 0b8217b4763f..59d6645a4007 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -50,7 +50,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy);
+ ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL);
if (ret < 0)
return ret;
if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] ||
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 823a73ad0c60..43605e7ce051 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -94,7 +94,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy);
+ err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 06ccae3c12ee..6b3e65d7de0c 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -82,7 +82,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (nla == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+ err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index c736627f8f4a..a73c4bbcada2 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -103,7 +103,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+ err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index e3a58e021198..b9a2f241a5b3 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -89,7 +89,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy);
+ err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
+ NULL);
if (err < 0)
return err;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 19e0dba305ce..13ba3a89f675 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -121,7 +121,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!nla)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy);
+ err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 732f7cae459d..22f88b35a546 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -178,14 +178,11 @@ errout:
return ERR_PTR(err);
}
-static bool tcf_proto_destroy(struct tcf_proto *tp, bool force)
+static void tcf_proto_destroy(struct tcf_proto *tp)
{
- if (tp->ops->destroy(tp, force)) {
- module_put(tp->ops->owner);
- kfree_rcu(tp, rcu);
- return true;
- }
- return false;
+ tp->ops->destroy(tp);
+ module_put(tp->ops->owner);
+ kfree_rcu(tp, rcu);
}
void tcf_destroy_chain(struct tcf_proto __rcu **fl)
@@ -194,14 +191,15 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl)
while ((tp = rtnl_dereference(*fl)) != NULL) {
RCU_INIT_POINTER(*fl, tp->next);
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
}
}
EXPORT_SYMBOL(tcf_destroy_chain);
/* Add/change/delete/get a filter node */
-static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
@@ -229,7 +227,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
replay:
tp_created = 0;
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -360,7 +358,7 @@ replay:
RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh,
RTM_DELTFILTER, false);
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
err = 0;
goto errout;
}
@@ -371,24 +369,28 @@ replay:
goto errout;
}
} else {
+ bool last;
+
switch (n->nlmsg_type) {
case RTM_NEWTFILTER:
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
err = -EEXIST;
goto errout;
}
break;
case RTM_DELTFILTER:
- err = tp->ops->delete(tp, fh);
+ err = tp->ops->delete(tp, fh, &last);
if (err)
goto errout;
next = rtnl_dereference(tp->next);
tfilter_notify(net, skb, n, tp, t->tcm_handle,
RTM_DELTFILTER, false);
- if (tcf_proto_destroy(tp, false))
+ if (last) {
RCU_INIT_POINTER(*back, next);
+ tcf_proto_destroy(tp);
+ }
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh,
@@ -410,7 +412,7 @@ replay:
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_proto_destroy(tp, true);
+ tcf_proto_destroy(tp);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5877f6061b57..c4fd63a068f9 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -93,30 +93,28 @@ static void basic_delete_filter(struct rcu_head *head)
kfree(f);
}
-static bool basic_destroy(struct tcf_proto *tp, bool force)
+static void basic_destroy(struct tcf_proto *tp)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
- if (!force && !list_empty(&head->flist))
- return false;
-
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, basic_delete_filter);
}
kfree_rcu(head, rcu);
- return true;
}
-static int basic_delete(struct tcf_proto *tp, unsigned long arg)
+static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f = (struct basic_filter *) arg;
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, basic_delete_filter);
+ *last = list_empty(&head->flist);
return 0;
}
@@ -174,7 +172,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
- basic_policy);
+ basic_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 80f688436dd7..5ebeae996e63 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -274,25 +274,24 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
}
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+
__cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
+ *last = list_empty(&head->plist);
return 0;
}
-static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
+static void cls_bpf_destroy(struct tcf_proto *tp)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
- if (!force && !list_empty(&head->plist))
- return false;
-
list_for_each_entry_safe(prog, tmp, &head->plist, link)
__cls_bpf_delete(tp, prog);
kfree_rcu(head, rcu);
- return true;
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
@@ -478,7 +477,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
- ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
+ ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy,
+ NULL);
if (ret < 0)
return ret;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index c1f20077837f..12ce547eea04 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -99,7 +99,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
new->handle = handle;
new->tp = tp;
err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
- cgroup_policy);
+ cgroup_policy, NULL);
if (err < 0)
goto errout;
@@ -131,20 +131,16 @@ errout:
return err;
}
-static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)
+static void cls_cgroup_destroy(struct tcf_proto *tp)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
- if (!force)
- return false;
/* Head can still be NULL due to cls_cgroup_init(). */
if (head)
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
-
- return true;
}
-static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3d6b9286c203..3065752b9cda 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -400,7 +400,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
+ err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL);
if (err < 0)
return err;
@@ -508,9 +508,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
get_random_bytes(&fnew->hashrnd, 4);
}
- fnew->perturb_timer.function = flow_perturbation;
- fnew->perturb_timer.data = (unsigned long)fnew;
- init_timer_deferrable(&fnew->perturb_timer);
+ setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
+ (unsigned long)fnew);
tcf_exts_change(tp, &fnew->exts, &e);
tcf_em_tree_change(tp, &fnew->ematches, &t);
@@ -563,12 +562,14 @@ err1:
return err;
}
-static int flow_delete(struct tcf_proto *tp, unsigned long arg)
+static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f = (struct flow_filter *)arg;
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
+ *last = list_empty(&head->filters);
return 0;
}
@@ -584,20 +585,16 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static bool flow_destroy(struct tcf_proto *tp, bool force)
+static void flow_destroy(struct tcf_proto *tp)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
- if (!force && !list_empty(&head->filters))
- return false;
-
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
}
kfree_rcu(head, rcu);
- return true;
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9d0c99d2e9fb..3ecf07666df3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/in6.h>
#include <linux/ip.h>
+#include <linux/mpls.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
@@ -47,6 +48,7 @@ struct fl_flow_key {
struct flow_dissector_key_ipv6_addrs enc_ipv6;
};
struct flow_dissector_key_ports enc_tp;
+ struct flow_dissector_key_mpls mpls;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -328,21 +330,16 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
schedule_work(&head->work);
}
-static bool fl_destroy(struct tcf_proto *tp, bool force)
+static void fl_destroy(struct tcf_proto *tp)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f, *next;
- if (!force && !list_empty(&head->filters))
- return false;
-
list_for_each_entry_safe(f, next, &head->filters, list)
__fl_delete(tp, f);
__module_get(THIS_MODULE);
call_rcu(&head->rcu, fl_destroy_rcu);
-
- return true;
}
static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
@@ -423,6 +420,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ARP_SHA_MASK] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ARP_THA] = { .len = ETH_ALEN },
[TCA_FLOWER_KEY_ARP_THA_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_MPLS_TTL] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -438,6 +439,31 @@ static void fl_set_key_val(struct nlattr **tb,
memcpy(mask, nla_data(tb[mask_type]), len);
}
+static void fl_set_key_mpls(struct nlattr **tb,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask)
+{
+ if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
+ key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
+ key_mask->mpls_ttl = MPLS_TTL_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
+ key_val->mpls_bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
+ key_mask->mpls_bos = MPLS_BOS_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
+ key_val->mpls_tc =
+ nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]) & MPLS_TC_MASK;
+ key_mask->mpls_tc = MPLS_TC_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+ key_val->mpls_label =
+ nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]) &
+ MPLS_LABEL_MASK;
+ key_mask->mpls_label = MPLS_LABEL_MASK;
+ }
+}
+
static void fl_set_key_vlan(struct nlattr **tb,
struct flow_dissector_key_vlan *key_val,
struct flow_dissector_key_vlan *key_mask)
@@ -594,6 +620,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->icmp.code,
TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) ||
+ key->basic.n_proto == htons(ETH_P_MPLS_MC)) {
+ fl_set_key_mpls(tb, &key->mpls, &mask->mpls);
} else if (key->basic.n_proto == htons(ETH_P_ARP) ||
key->basic.n_proto == htons(ETH_P_RARP)) {
fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
@@ -730,6 +759,8 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ARP, arp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_MPLS, mpls);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
@@ -848,7 +879,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!tb)
return -ENOBUFS;
- err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+ err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
+ fl_policy, NULL);
if (err < 0)
goto errout_tb;
@@ -946,7 +978,7 @@ errout_tb:
return err;
}
-static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
@@ -955,6 +987,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
__fl_delete(tp, f);
+ *last = list_empty(&head->filters);
return 0;
}
@@ -994,6 +1027,41 @@ static int fl_dump_key_val(struct sk_buff *skb,
return 0;
}
+static int fl_dump_key_mpls(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask)
+{
+ int err;
+
+ if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+ return 0;
+ if (mpls_mask->mpls_ttl) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
+ mpls_key->mpls_ttl);
+ if (err)
+ return err;
+ }
+ if (mpls_mask->mpls_tc) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
+ mpls_key->mpls_tc);
+ if (err)
+ return err;
+ }
+ if (mpls_mask->mpls_label) {
+ err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
+ mpls_key->mpls_label);
+ if (err)
+ return err;
+ }
+ if (mpls_mask->mpls_bos) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
+ mpls_key->mpls_bos);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int fl_dump_key_vlan(struct sk_buff *skb,
struct flow_dissector_key_vlan *vlan_key,
struct flow_dissector_key_vlan *vlan_mask)
@@ -1099,6 +1167,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
sizeof(key->basic.n_proto)))
goto nla_put_failure;
+ if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
+ goto nla_put_failure;
+
if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
goto nla_put_failure;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 9dc63d54e167..d3885362e017 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -127,20 +127,14 @@ static void fw_delete_filter(struct rcu_head *head)
kfree(f);
}
-static bool fw_destroy(struct tcf_proto *tp, bool force)
+static void fw_destroy(struct tcf_proto *tp)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
if (head == NULL)
- return true;
-
- if (!force) {
- for (h = 0; h < HTSIZE; h++)
- if (rcu_access_pointer(head->ht[h]))
- return false;
- }
+ return;
for (h = 0; h < HTSIZE; h++) {
while ((f = rtnl_dereference(head->ht[h])) != NULL) {
@@ -150,17 +144,17 @@ static bool fw_destroy(struct tcf_proto *tp, bool force)
call_rcu(&f->rcu, fw_delete_filter);
}
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
- return true;
}
-static int fw_delete(struct tcf_proto *tp, unsigned long arg)
+static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)arg;
struct fw_filter __rcu **fp;
struct fw_filter *pfp;
+ int ret = -EINVAL;
+ int h;
if (head == NULL || f == NULL)
goto out;
@@ -173,11 +167,21 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg)
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, fw_delete_filter);
- return 0;
+ ret = 0;
+ break;
}
}
+
+ *last = true;
+ for (h = 0; h < HTSIZE; h++) {
+ if (rcu_access_pointer(head->ht[h])) {
+ *last = false;
+ break;
+ }
+ }
+
out:
- return -EINVAL;
+ return ret;
}
static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
@@ -250,7 +254,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (!opt)
return handle ? -EINVAL : 0; /* Succeed if it is old method. */
- err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
+ err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 224eb2c14346..2efb36c08f2a 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -90,19 +90,18 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
&offload);
}
-static bool mall_destroy(struct tcf_proto *tp, bool force)
+static void mall_destroy(struct tcf_proto *tp)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct net_device *dev = tp->q->dev_queue->dev;
if (!head)
- return true;
+ return;
if (tc_should_offload(dev, tp, head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head);
call_rcu(&head->rcu, mall_destroy_rcu);
- return true;
}
static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
@@ -161,8 +160,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (head)
return -EEXIST;
- err = nla_parse_nested(tb, TCA_MATCHALL_MAX,
- tca[TCA_OPTIONS], mall_policy);
+ err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS],
+ mall_policy, NULL);
if (err < 0)
return err;
@@ -216,7 +215,7 @@ err_exts_init:
return err;
}
-static int mall_delete(struct tcf_proto *tp, unsigned long arg)
+static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 455fc8f83d0a..d63d5502ee02 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -140,8 +140,6 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
goto failure;
id = dst->tclassid;
- if (head == NULL)
- goto old_method;
iif = inet_iif(skb);
@@ -194,15 +192,6 @@ restart:
route4_set_fastmap(head, id, iif, ROUTE4_FAILURE);
failure:
return -1;
-
-old_method:
- if (id && (TC_H_MAJ(id) == 0 ||
- !(TC_H_MAJ(id^tp->q->handle)))) {
- res->classid = id;
- res->class = 0;
- return 0;
- }
- return -1;
}
static inline u32 to_hash(u32 id)
@@ -234,9 +223,6 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
struct route4_filter *f;
unsigned int h1, h2;
- if (!head)
- return 0;
-
h1 = to_hash(handle);
if (h1 > 256)
return 0;
@@ -276,20 +262,13 @@ static void route4_delete_filter(struct rcu_head *head)
kfree(f);
}
-static bool route4_destroy(struct tcf_proto *tp, bool force)
+static void route4_destroy(struct tcf_proto *tp)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
- return true;
-
- if (!force) {
- for (h1 = 0; h1 <= 256; h1++) {
- if (rcu_access_pointer(head->table[h1]))
- return false;
- }
- }
+ return;
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
@@ -312,12 +291,10 @@ static bool route4_destroy(struct tcf_proto *tp, bool force)
kfree_rcu(b, rcu);
}
}
- RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
- return true;
}
-static int route4_delete(struct tcf_proto *tp, unsigned long arg)
+static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = (struct route4_filter *)arg;
@@ -325,7 +302,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
struct route4_filter *nf;
struct route4_bucket *b;
unsigned int h = 0;
- int i;
+ int i, h1;
if (!head || !f)
return -EINVAL;
@@ -356,16 +333,25 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
rt = rtnl_dereference(b->ht[i]);
if (rt)
- return 0;
+ goto out;
}
/* OK, session has no flows */
RCU_INIT_POINTER(head->table[to_hash(h)], NULL);
kfree_rcu(b, rcu);
+ break;
+ }
+ }
- return 0;
+out:
+ *last = true;
+ for (h1 = 0; h1 <= 256; h1++) {
+ if (rcu_access_pointer(head->table[h1])) {
+ *last = false;
+ break;
}
}
+
return 0;
}
@@ -489,7 +475,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return handle ? -EINVAL : 0;
- err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy);
+ err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 322438fb3ffc..0d9d07798699 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -152,8 +152,6 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
nhptr = ip_hdr(skb);
#endif
- if (unlikely(!head))
- return -1;
restart:
#if RSVP_DST_LEN == 4
@@ -302,22 +300,13 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
call_rcu(&f->rcu, rsvp_delete_filter_rcu);
}
-static bool rsvp_destroy(struct tcf_proto *tp, bool force)
+static void rsvp_destroy(struct tcf_proto *tp)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
- return true;
-
- if (!force) {
- for (h1 = 0; h1 < 256; h1++) {
- if (rcu_access_pointer(data->ht[h1]))
- return false;
- }
- }
-
- RCU_INIT_POINTER(tp->root, NULL);
+ return;
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
@@ -337,10 +326,9 @@ static bool rsvp_destroy(struct tcf_proto *tp, bool force)
}
}
kfree_rcu(data, rcu);
- return true;
}
-static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
+static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
@@ -348,7 +336,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
unsigned int h = f->handle;
struct rsvp_session __rcu **sp;
struct rsvp_session *nsp, *s = f->sess;
- int i;
+ int i, h1;
fp = &s->ht[(h >> 8) & 0xFF];
for (nfp = rtnl_dereference(*fp); nfp;
@@ -361,7 +349,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
for (i = 0; i <= 16; i++)
if (s->ht[i])
- return 0;
+ goto out;
/* OK, session has no flows */
sp = &head->ht[h & 0xFF];
@@ -370,13 +358,23 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
if (nsp == s) {
RCU_INIT_POINTER(*sp, s->next);
kfree_rcu(s, rcu);
- return 0;
+ goto out;
}
}
- return 0;
+ break;
}
}
+
+out:
+ *last = true;
+ for (h1 = 0; h1 < 256; h1++) {
+ if (rcu_access_pointer(head->ht[h1])) {
+ *last = false;
+ break;
+ }
+ }
+
return 0;
}
@@ -484,7 +482,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return handle ? -EINVAL : 0;
- err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
+ err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 0751245a6ace..8a8a58357c39 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -150,7 +150,7 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
kfree(f);
}
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
@@ -186,6 +186,8 @@ found:
call_rcu(&f->rcu, tcindex_destroy_fexts);
else
call_rcu(&r->rcu, tcindex_destroy_rexts);
+
+ *last = false;
return 0;
}
@@ -193,7 +195,9 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
unsigned long arg,
struct tcf_walker *walker)
{
- return tcindex_delete(tp, arg);
+ bool last;
+
+ return tcindex_delete(tp, arg, &last);
}
static void __tcindex_destroy(struct rcu_head *head)
@@ -482,7 +486,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
if (!opt)
return 0;
- err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
+ err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL);
if (err < 0)
return err;
@@ -529,14 +533,11 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static bool tcindex_destroy(struct tcf_proto *tp, bool force)
+static void tcindex_destroy(struct tcf_proto *tp)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
- if (!force)
- return false;
-
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
walker.count = 0;
walker.skip = 0;
@@ -544,7 +545,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force)
tcindex_walk(tp, &walker);
call_rcu(&p->rcu, __tcindex_destroy);
- return true;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4dbe0c680fe6..d20e72a095d5 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -585,37 +585,13 @@ static bool ht_empty(struct tc_u_hnode *ht)
return true;
}
-static bool u32_destroy(struct tcf_proto *tp, bool force)
+static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
- if (!force) {
- if (root_ht) {
- if (root_ht->refcnt > 1)
- return false;
- if (root_ht->refcnt == 1) {
- if (!ht_empty(root_ht))
- return false;
- }
- }
-
- if (tp_c->refcnt > 1)
- return false;
-
- if (tp_c->refcnt == 1) {
- struct tc_u_hnode *ht;
-
- for (ht = rtnl_dereference(tp_c->hlist);
- ht;
- ht = rtnl_dereference(ht->next))
- if (!ht_empty(ht))
- return false;
- }
- }
-
if (root_ht && --root_ht->refcnt == 0)
u32_destroy_hnode(tp, root_ht);
@@ -640,20 +616,22 @@ static bool u32_destroy(struct tcf_proto *tp, bool force)
}
tp->data = NULL;
- return true;
}
-static int u32_delete(struct tcf_proto *tp, unsigned long arg)
+static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
{
struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
+ struct tc_u_common *tp_c = tp->data;
+ int ret = 0;
if (ht == NULL)
- return 0;
+ goto out;
if (TC_U32_KEY(ht->handle)) {
u32_remove_hw_knode(tp, ht->handle);
- return u32_delete_key(tp, (struct tc_u_knode *)ht);
+ ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
+ goto out;
}
if (root_ht == ht)
@@ -666,7 +644,40 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
return -EBUSY;
}
- return 0;
+out:
+ *last = true;
+ if (root_ht) {
+ if (root_ht->refcnt > 1) {
+ *last = false;
+ goto ret;
+ }
+ if (root_ht->refcnt == 1) {
+ if (!ht_empty(root_ht)) {
+ *last = false;
+ goto ret;
+ }
+ }
+ }
+
+ if (tp_c->refcnt > 1) {
+ *last = false;
+ goto ret;
+ }
+
+ if (tp_c->refcnt == 1) {
+ struct tc_u_hnode *ht;
+
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
+ if (!ht_empty(ht)) {
+ *last = false;
+ break;
+ }
+ }
+
+ret:
+ return ret;
}
#define NR_U32_NODE (1<<12)
@@ -860,7 +871,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (opt == NULL)
return handle ? -EINVAL : 0;
- err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
+ err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index ae7e4f5b348b..eb0e9bab54c1 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -912,7 +912,7 @@ static int em_meta_change(struct net *net, void *data, int len,
struct tcf_meta_hdr *hdr;
struct meta_match *meta = NULL;
- err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy);
+ err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL);
if (err < 0)
goto errout;
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index fbb7ebfc58c6..03b677bc0700 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -314,7 +314,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
if (!nla)
return 0;
- err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);
+ err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL);
if (err < 0)
goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bcf49cd22786..bbe57d57b67f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -251,6 +251,15 @@ int qdisc_set_default(const char *name)
return ops ? 0 : -ENOENT;
}
+#ifdef CONFIG_NET_SCH_DEFAULT
+/* Set default value from kernel config */
+static int __init sch_default_qdisc(void)
+{
+ return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
+}
+late_initcall(sch_default_qdisc);
+#endif
+
/* We know handle. Find qdisc among all qdisc's attached to device
* (root qdisc, all its children, children of children etc.)
* Note: caller either uses rtnl or rcu_read_lock()
@@ -274,7 +283,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
return NULL;
}
-void qdisc_hash_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q, bool invisible)
{
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
struct Qdisc *root = qdisc_dev(q)->qdisc;
@@ -282,6 +291,8 @@ void qdisc_hash_add(struct Qdisc *q)
WARN_ON_ONCE(root == &noop_qdisc);
ASSERT_RTNL();
hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
+ if (invisible)
+ q->flags |= TCQ_F_INVISIBLE;
}
}
EXPORT_SYMBOL(qdisc_hash_add);
@@ -455,7 +466,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
u16 *tab = NULL;
int err;
- err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
+ err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
if (err < 0)
return ERR_PTR(err);
if (!tb[TCA_STAB_BASE])
@@ -1003,7 +1014,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
goto err_out4;
}
- qdisc_hash_add(sch);
+ qdisc_hash_add(sch, false);
return sch;
}
@@ -1114,7 +1125,8 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
* Delete/get qdisc.
*/
-static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm = nlmsg_data(n);
@@ -1129,7 +1141,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -1183,7 +1195,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
* Create/change qdisc.
*/
-static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm;
@@ -1198,7 +1211,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
replay:
/* Reinit, just in case something touches this. */
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -1401,9 +1414,14 @@ nla_put_failure:
return -1;
}
-static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
{
- return (q->flags & TCQ_F_BUILTIN) ? true : false;
+ if (q->flags & TCQ_F_BUILTIN)
+ return true;
+ if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+ return true;
+
+ return false;
}
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
@@ -1417,12 +1435,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (old && !tc_qdisc_dump_ignore(old)) {
+ if (old && !tc_qdisc_dump_ignore(old, false)) {
if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
0, RTM_DELQDISC) < 0)
goto err_out;
}
- if (new && !tc_qdisc_dump_ignore(new)) {
+ if (new && !tc_qdisc_dump_ignore(new, false)) {
if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
goto err_out;
@@ -1439,7 +1457,8 @@ err_out:
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
- int *q_idx_p, int s_q_idx, bool recur)
+ int *q_idx_p, int s_q_idx, bool recur,
+ bool dump_invisible)
{
int ret = 0, q_idx = *q_idx_p;
struct Qdisc *q;
@@ -1452,7 +1471,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (q_idx < s_q_idx) {
q_idx++;
} else {
- if (!tc_qdisc_dump_ignore(q) &&
+ if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWQDISC) <= 0)
@@ -1474,7 +1493,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
q_idx++;
continue;
}
- if (!tc_qdisc_dump_ignore(q) &&
+ if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWQDISC) <= 0)
@@ -1496,12 +1515,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
int idx, q_idx;
int s_idx, s_q_idx;
struct net_device *dev;
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct tcmsg *tcm = nlmsg_data(nlh);
+ struct nlattr *tca[TCA_MAX + 1];
+ int err;
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
idx = 0;
ASSERT_RTNL();
+
+ err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ if (err < 0)
+ return err;
+
for_each_netdev(net, dev) {
struct netdev_queue *dev_queue;
@@ -1512,13 +1540,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
q_idx = 0;
if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
- true) < 0)
+ true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
- &q_idx, s_q_idx, false) < 0)
+ &q_idx, s_q_idx, false,
+ tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
cont:
@@ -1540,7 +1569,8 @@ done:
-static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm = nlmsg_data(n);
@@ -1559,7 +1589,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
if (err < 0)
return err;
@@ -1762,7 +1792,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
{
struct qdisc_dump_args arg;
- if (tc_qdisc_dump_ignore(q) ||
+ if (tc_qdisc_dump_ignore(q, false) ||
*t_p < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2209c2ddacbf..40cbceed4de8 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -214,7 +214,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
if (opt == NULL)
return -EINVAL;
- error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy);
+ error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL);
if (error < 0)
return error;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d6ca18dc04c3..7415859fd4c3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1137,7 +1137,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
struct tc_ratespec *r;
int err;
- err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
+ err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
if (err < 0)
return err;
@@ -1161,6 +1161,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
sch->handle);
if (!q->link.q)
q->link.q = &noop_qdisc;
+ else
+ qdisc_hash_add(q->link.q, true);
q->link.priority = TC_CBQ_MAXPRIO - 1;
q->link.priority2 = TC_CBQ_MAXPRIO - 1;
@@ -1472,7 +1474,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
+ err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
if (err < 0)
return err;
@@ -1600,6 +1602,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!cl->q)
cl->q = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->q, true);
+
cl->common.classid = classid;
cl->tparent = parent;
cl->qdisc = sch;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3b86a97bc67c..d00f4c7c2f3a 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -58,7 +58,6 @@ struct choke_sched_data {
/* Variables */
struct red_vars vars;
- struct tcf_proto __rcu *filter_list;
struct {
u32 prob_drop; /* Early probability drops */
u32 prob_mark; /* Early probability marks */
@@ -152,11 +151,6 @@ static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
choke_skb_cb(skb)->classid = classid;
}
-static u16 choke_get_classid(const struct sk_buff *skb)
-{
- return choke_skb_cb(skb)->classid;
-}
-
/*
* Compare flow of two packets
* Returns true only if source and destination address and port match.
@@ -188,40 +182,6 @@ static bool choke_match_flow(struct sk_buff *skb1,
}
/*
- * Classify flow using either:
- * 1. pre-existing classification result in skb
- * 2. fast internal classification
- * 3. use TC filter based classification
- */
-static bool choke_classify(struct sk_buff *skb,
- struct Qdisc *sch, int *qerr)
-
-{
- struct choke_sched_data *q = qdisc_priv(sch);
- struct tcf_result res;
- struct tcf_proto *fl;
- int result;
-
- fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res, false);
- if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- case TC_ACT_SHOT:
- return false;
- }
-#endif
- choke_set_classid(skb, TC_H_MIN(res.classid));
- return true;
- }
-
- return false;
-}
-
-/*
* Select a packet at random from queue
* HACK: since queue can have holes from previous deletion; retry several
* times to find a random skb but then just give up and return the head
@@ -257,25 +217,15 @@ static bool choke_match_random(const struct choke_sched_data *q,
return false;
oskb = choke_peek_random(q, pidx);
- if (rcu_access_pointer(q->filter_list))
- return choke_get_classid(nskb) == choke_get_classid(oskb);
-
return choke_match_flow(oskb, nskb);
}
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
const struct red_parms *p = &q->parms;
- if (rcu_access_pointer(q->filter_list)) {
- /* If using external classifiers, get result and record it. */
- if (!choke_classify(skb, sch, &ret))
- goto other_drop; /* Packet was eaten by filter */
- }
-
choke_skb_cb(skb)->keys_valid = 0;
/* Compute average queue usage (see RED) */
q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
@@ -339,12 +289,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
congestion_drop:
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
-
-other_drop:
- if (ret & __NET_XMIT_BYPASS)
- qdisc_qstats_drop(sch);
- __qdisc_drop(skb, to_free);
- return ret;
}
static struct sk_buff *choke_dequeue(struct Qdisc *sch)
@@ -413,7 +357,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
+ err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL);
if (err < 0)
return err;
@@ -538,7 +482,6 @@ static void choke_destroy(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
choke_free(q->tab);
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 5bfa79ee657c..c518a1efcb9d 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -140,7 +140,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy);
+ err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index bb4cbdf75004..58a8c32eab23 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -76,7 +76,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy);
+ err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL);
if (err < 0)
return err;
@@ -117,6 +117,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
&pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->qdisc, true);
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 5334e309f17f..1c0f877f673a 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -129,7 +129,7 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
if (!opt)
goto errout;
- err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
+ err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
if (err < 0)
goto errout;
@@ -342,7 +342,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
- err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
+ err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
if (err < 0)
goto errout;
@@ -374,6 +374,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
if (p->q == NULL)
p->q = &noop_qdisc;
+ else
+ qdisc_hash_add(p->q, true);
pr_debug("%s: qdisc %p\n", __func__, p->q);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a4f738ac7728..da4f67bda0ee 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -698,7 +698,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy);
+ err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9f3a884d1590..18bbb5476c83 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -288,7 +288,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct fq_codel_flow *flow;
struct list_head *head;
u32 prev_drop_count, prev_ecn_mark;
- unsigned int prev_backlog;
begin:
head = &q->new_flows;
@@ -307,7 +306,6 @@ begin:
prev_drop_count = q->cstats.drop_count;
prev_ecn_mark = q->cstats.ecn_mark;
- prev_backlog = sch->qstats.backlog;
skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
&flow->cvars, &q->cstats, qdisc_pkt_len,
@@ -385,7 +383,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy);
+ err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy,
+ NULL);
if (err < 0)
return err;
if (tb[TCA_FQ_CODEL_FLOWS]) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1a2f9e964330..52a2c55f6d9e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -795,7 +795,7 @@ static void attach_default_qdiscs(struct net_device *dev)
}
#ifdef CONFIG_NET_SCHED
if (dev->qdisc != &noop_qdisc)
- qdisc_hash_add(dev->qdisc);
+ qdisc_hash_add(dev->qdisc, false);
#endif
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c78a093c551a..17c7130454bd 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -401,7 +401,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
+ err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
if (err < 0)
return err;
@@ -470,7 +470,7 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
+ err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ffaa6fb0990..5cb82f6c1b06 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -957,7 +957,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy);
+ err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL);
if (err < 0)
return err;
@@ -1066,6 +1066,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
&pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->qdisc, true);
INIT_LIST_HEAD(&cl->children);
cl->vt_tree = RB_ROOT;
cl->cf_tree = RB_ROOT;
@@ -1425,6 +1427,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
sch->handle);
if (q->root.qdisc == NULL)
q->root.qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(q->root.qdisc, true);
INIT_LIST_HEAD(&q->root.children);
q->root.vt_tree = RB_ROOT;
q->root.cf_tree = RB_ROOT;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 2fae8b5f1b80..c19d346e6c5a 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -529,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy);
+ err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4cd5fb134bc9..570ef3b0c09b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1017,7 +1017,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
+ err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
if (err < 0)
return err;
@@ -1342,7 +1342,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!opt)
goto failure;
- err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
+ err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
if (err < 0)
goto failure;
@@ -1460,6 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_class_hash_insert(&q->clhash, &cl->common);
if (parent)
parent->children++;
+ if (cl->un.leaf.q != &noop_qdisc)
+ qdisc_hash_add(cl->un.leaf.q, true);
} else {
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 20b7f1646f69..cadfdd4f1e52 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -84,7 +84,7 @@ static void mq_attach(struct Qdisc *sch)
qdisc_destroy(old);
#ifdef CONFIG_NET_SCHED
if (ntx < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc);
+ qdisc_hash_add(qdisc, false);
#endif
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 922683418e53..0a4cf27ea54b 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -21,14 +21,13 @@
struct mqprio_sched {
struct Qdisc **qdiscs;
- int hw_owned;
+ int hw_offload;
};
static void mqprio_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
- struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
unsigned int ntx;
if (priv->qdiscs) {
@@ -39,10 +38,15 @@ static void mqprio_destroy(struct Qdisc *sch)
kfree(priv->qdiscs);
}
- if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+ if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
+ struct tc_mqprio_qopt offload = { 0 };
+ struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+ { .mqprio = &offload } };
+
dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
- else
+ } else {
netdev_set_num_tc(dev, 0);
+ }
}
static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
@@ -59,15 +63,20 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
return -EINVAL;
}
- /* net_device does not support requested operation */
- if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
- return -EINVAL;
+ /* Limit qopt->hw to maximum supported offload value. Drivers have
+ * the option of overriding this later if they don't support the a
+ * given offload type.
+ */
+ if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
+ qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
- /* if hw owned qcount and qoffset are taken from LLD so
- * no reason to verify them here
+ /* If hardware offload is requested we will leave it to the device
+ * to either populate the queue counts itself or to validate the
+ * provided queue counts. If ndo_setup_tc is not present then
+ * hardware doesn't support offload and we should return an error.
*/
if (qopt->hw)
- return 0;
+ return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
for (i = 0; i < qopt->num_tc; i++) {
unsigned int last = qopt->offset[i] + qopt->count[i];
@@ -139,13 +148,15 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
- { .tc = qopt->num_tc }};
+ struct tc_mqprio_qopt offload = *qopt;
+ struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+ { .mqprio = &offload } };
- priv->hw_owned = 1;
err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
if (err)
return err;
+
+ priv->hw_offload = offload.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -175,7 +186,7 @@ static void mqprio_attach(struct Qdisc *sch)
if (old)
qdisc_destroy(old);
if (ntx < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc);
+ qdisc_hash_add(qdisc, false);
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
@@ -243,7 +254,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.num_tc = netdev_get_num_tc(dev);
memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
- opt.hw = priv->hw_owned;
+ opt.hw = priv->hw_offload;
for (i = 0; i < netdev_get_num_tc(dev); i++) {
opt.count[i] = dev->tc_to_txq[i].count;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index e7839a0d0eaa..43a3a10b3c81 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -217,6 +217,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
old = q->queues[i];
q->queues[i] = child;
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
if (old != &noop_qdisc) {
qdisc_tree_reduce_backlog(old,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c8bb62a1e744..f0ce4780f395 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -462,7 +462,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* If a delay is expected, orphan the skb. (orphaning usually takes
* place at TX completion time, so _before_ the link transit delay)
*/
- if (q->latency || q->jitter)
+ if (q->latency || q->jitter || q->rate)
skb_orphan_partial(skb);
/*
@@ -530,21 +530,31 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
now = psched_get_time();
if (q->rate) {
- struct sk_buff *last;
+ struct netem_skb_cb *last = NULL;
+
+ if (sch->q.tail)
+ last = netem_skb_cb(sch->q.tail);
+ if (q->t_root.rb_node) {
+ struct sk_buff *t_skb;
+ struct netem_skb_cb *t_last;
+
+ t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+ t_last = netem_skb_cb(t_skb);
+ if (!last ||
+ t_last->time_to_send > last->time_to_send) {
+ last = t_last;
+ }
+ }
- if (sch->q.qlen)
- last = sch->q.tail;
- else
- last = netem_rb_to_skb(rb_last(&q->t_root));
if (last) {
/*
* Last packet in queue is reference point (now),
* calculate this time bonus and subtract
* from delay.
*/
- delay -= netem_skb_cb(last)->time_to_send - now;
+ delay -= last->time_to_send - now;
delay = max_t(psched_tdiff_t, 0, delay);
- now = netem_skb_cb(last)->time_to_send;
+ now = last->time_to_send;
}
delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
@@ -833,7 +843,7 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
if (nested_len >= nla_attr_size(0))
return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
- nested_len, policy);
+ nested_len, policy, NULL);
memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
return 0;
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 5c3a99d6aa82..6c2791d6102d 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -190,7 +190,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy);
+ err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index d4d7db267b6e..92c2e6d448d7 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -192,8 +192,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
qdisc_destroy(child);
}
- for (i = oldbands; i < q->bands; i++)
+ for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
+ if (q->queues[i] != &noop_qdisc)
+ qdisc_hash_add(q->queues[i], true);
+ }
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f9e712ce2d15..041eba3006cc 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -418,7 +418,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -EINVAL;
}
- err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
+ err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy,
+ NULL);
if (err < 0)
return err;
@@ -494,6 +495,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
goto destroy_class;
}
+ if (cl->qdisc != &noop_qdisc)
+ qdisc_hash_add(cl->qdisc, true);
sch_tree_lock(sch);
qdisc_class_hash_insert(&q->clhash, &cl->common);
sch_tree_unlock(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 249b2a18acbd..11292adce412 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -173,7 +173,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
+ err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
if (err < 0)
return err;
@@ -191,6 +191,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
return PTR_ERR(child);
}
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
sch_tree_lock(sch);
q->flags = ctl->flags;
q->limit = ctl->limit;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index fe6963d21519..0f777273ba29 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -495,7 +495,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
int err;
if (opt) {
- err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy);
+ err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL);
if (err < 0)
return -EINVAL;
@@ -513,6 +513,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
if (IS_ERR(child))
return PTR_ERR(child);
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
sch_tree_lock(sch);
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42e8c8615e65..b00e02c139de 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -714,9 +714,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
struct sfq_sched_data *q = qdisc_priv(sch);
int i;
- q->perturb_timer.function = sfq_perturbation;
- q->perturb_timer.data = (unsigned long)sch;
- init_timer_deferrable(&q->perturb_timer);
+ setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
+ (unsigned long)sch);
for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
q->dep[i].next = i + SFQ_MAX_FLOWS;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 303355c449ab..b2e4b6ad241a 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -315,7 +315,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
s64 buffer, mtu;
u64 rate64 = 0, prate64 = 0;
- err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
+ err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL);
if (err < 0)
return err;
@@ -396,6 +396,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
}
q->limit = qopt->limit;
if (tb[TCA_TBF_PBURST])
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index e3621cb4827f..697721a7a3f1 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
time_after(jiffies, chunk->msg->expires_at)) {
- if (chunk->sent_count)
+ struct sctp_stream_out *streamout =
+ &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
+ if (chunk->sent_count) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
- else
+ streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ } else {
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ }
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
+ struct sctp_stream_out *streamout =
+ &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 8081476ed313..fe4c3d462f6e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
struct sctp_chunk *chk, *temp;
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+ struct sctp_stream_out *streamout;
+
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
@@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
sctp_insert_list(&asoc->outqueue.abandoned,
&chk->transmitted_list);
+ streamout = &asoc->stream->out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
if (!chk->tsn_gap_acked) {
if (chk->transport)
@@ -396,6 +400,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
q->out_qlen -= chk->skb->len;
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) {
+ struct sctp_stream_out *streamout =
+ &asoc->stream->out[chk->sinfo.sinfo_stream];
+
+ streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ }
msg_len -= SCTP_DATA_SNDSIZE(chk) +
sizeof(struct sk_buff) +
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 24c6ccce7539..4f5e6cfc7f60 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3872,9 +3872,18 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST)
reply = sctp_process_strreset_inreq(
(struct sctp_association *)asoc, param, &ev);
- /* More handles for other types will be added here, by now it
- * just ignores other types.
- */
+ else if (param.p->type == SCTP_PARAM_RESET_TSN_REQUEST)
+ reply = sctp_process_strreset_tsnreq(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS)
+ reply = sctp_process_strreset_addstrm_out(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_IN_STREAMS)
+ reply = sctp_process_strreset_addstrm_in(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_RESPONSE)
+ reply = sctp_process_strreset_resp(
+ (struct sctp_association *)asoc, param, &ev);
if (ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d9d4c92e06b3..f16c8d97b7f3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3758,6 +3758,39 @@ out:
return retval;
}
+static int sctp_setsockopt_reconfig_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ asoc->reconf_enable = !!params.assoc_value;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ sp->ep->reconf_enable = !!params.assoc_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_setsockopt_enable_strreset(struct sock *sk,
char __user *optval,
unsigned int optlen)
@@ -4038,6 +4071,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_DEFAULT_PRINFO:
retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_setsockopt_enable_strreset(sk, optval, optlen);
break;
@@ -6540,6 +6576,102 @@ out:
return retval;
}
+static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_stream_out *streamout;
+ struct sctp_association *asoc;
+ struct sctp_prstatus params;
+ int retval = -EINVAL;
+ int policy;
+
+ if (len < sizeof(params))
+ goto out;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ policy = params.sprstat_policy;
+ if (policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+ if (!asoc || params.sprstat_sid >= asoc->stream->outcnt)
+ goto out;
+
+ streamout = &asoc->stream->out[params.sprstat_sid];
+ if (policy == SCTP_PR_SCTP_NONE) {
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+ params.sprstat_abandoned_unsent +=
+ streamout->abandoned_unsent[policy];
+ params.sprstat_abandoned_sent +=
+ streamout->abandoned_sent[policy];
+ }
+ } else {
+ params.sprstat_abandoned_unsent =
+ streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ params.sprstat_abandoned_sent =
+ streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ }
+
+ if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->reconf_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->ep->reconf_enable;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt_enable_strreset(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
@@ -6748,6 +6880,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
optlen);
break;
+ case SCTP_PR_STREAM_STATUS:
+ retval = sctp_getsockopt_pr_streamstatus(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_getsockopt_reconfig_supported(sk, len, optval,
+ optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_getsockopt_enable_strreset(sk, len, optval,
optlen);
@@ -7440,9 +7580,12 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
- if (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, noblock))
- continue;
+ if (sk_can_busy_loop(sk)) {
+ sk_busy_loop(sk, noblock);
+
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ continue;
+ }
/* User doesn't want to wait. */
error = -EAGAIN;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index bbed997e1c5f..dda53a293986 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -294,18 +294,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
stream->out = streamout;
}
- if (in) {
- struct sctp_stream_in *streamin;
-
- streamin = krealloc(stream->in, incnt * sizeof(*streamin),
- GFP_KERNEL);
- if (!streamin)
- goto out;
-
- memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
- stream->in = streamin;
- }
-
chunk = sctp_make_strreset_addstrm(asoc, out, in);
if (!chunk)
goto out;
@@ -330,13 +318,14 @@ out:
}
static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
- struct sctp_association *asoc, __u32 resp_seq)
+ struct sctp_association *asoc, __u32 resp_seq,
+ __be16 type)
{
struct sctp_chunk *chunk = asoc->strreset_chunk;
struct sctp_reconf_chunk *hdr;
union sctp_params param;
- if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk)
+ if (!chunk)
return NULL;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
@@ -347,13 +336,21 @@ static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
*/
struct sctp_strreset_tsnreq *req = param.v;
- if (req->request_seq == resp_seq)
+ if ((!resp_seq || req->request_seq == resp_seq) &&
+ (!type || type == req->param_hdr.type))
return param.v;
}
return NULL;
}
+static void sctp_update_strreset_result(struct sctp_association *asoc,
+ __u32 result)
+{
+ asoc->strreset_result[1] = asoc->strreset_result[0];
+ asoc->strreset_result[0] = result;
+}
+
struct sctp_chunk *sctp_process_strreset_outreq(
struct sctp_association *asoc,
union sctp_params param,
@@ -370,15 +367,19 @@ struct sctp_chunk *sctp_process_strreset_outreq(
if (ntohl(outreq->send_reset_at_tsn) >
sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map)) {
result = SCTP_STRRESET_IN_PROGRESS;
- goto out;
+ goto err;
}
- if (request_seq > asoc->strreset_inseq) {
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
result = SCTP_STRRESET_ERR_BAD_SEQNO;
- goto out;
- } else if (request_seq == asoc->strreset_inseq) {
- asoc->strreset_inseq++;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ goto err;
}
+ asoc->strreset_inseq++;
/* Check strreset_enable after inseq inc, as sender cannot tell
* the peer doesn't enable strreset after receiving response with
@@ -388,13 +389,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
goto out;
if (asoc->strreset_chunk) {
- sctp_paramhdr_t *param_hdr;
- struct sctp_transport *t;
-
- param_hdr = sctp_chunk_lookup_strreset_param(
- asoc, outreq->response_seq);
- if (!param_hdr || param_hdr->type !=
- SCTP_PARAM_RESET_IN_REQUEST) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, outreq->response_seq,
+ SCTP_PARAM_RESET_IN_REQUEST)) {
/* same process with outstanding isn't 0 */
result = SCTP_STRRESET_ERR_IN_PROGRESS;
goto out;
@@ -404,6 +401,8 @@ struct sctp_chunk *sctp_process_strreset_outreq(
asoc->strreset_outseq++;
if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
t = asoc->strreset_chunk->transport;
if (del_timer(&t->reconf_timer))
sctp_transport_put(t);
@@ -439,6 +438,8 @@ struct sctp_chunk *sctp_process_strreset_outreq(
GFP_ATOMIC);
out:
+ sctp_update_strreset_result(asoc, result);
+err:
return sctp_make_strreset_resp(asoc, result, request_seq);
}
@@ -455,12 +456,18 @@ struct sctp_chunk *sctp_process_strreset_inreq(
__u32 request_seq;
request_seq = ntohl(inreq->request_seq);
- if (request_seq > asoc->strreset_inseq) {
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
result = SCTP_STRRESET_ERR_BAD_SEQNO;
- goto out;
- } else if (request_seq == asoc->strreset_inseq) {
- asoc->strreset_inseq++;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+ goto err;
}
+ asoc->strreset_inseq++;
if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ))
goto out;
@@ -495,12 +502,407 @@ struct sctp_chunk *sctp_process_strreset_inreq(
asoc->strreset_outstanding = 1;
sctp_chunk_hold(asoc->strreset_chunk);
+ result = SCTP_STRRESET_PERFORMED;
+
*evp = sctp_ulpevent_make_stream_reset_event(asoc,
SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
out:
+ sctp_update_strreset_result(asoc, result);
+err:
if (!chunk)
chunk = sctp_make_strreset_resp(asoc, result, request_seq);
return chunk;
}
+
+struct sctp_chunk *sctp_process_strreset_tsnreq(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen;
+ struct sctp_strreset_tsnreq *tsnreq = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ __u32 request_seq;
+ __u16 i;
+
+ request_seq = ntohl(tsnreq->request_seq);
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ if (result == SCTP_STRRESET_PERFORMED) {
+ next_tsn = asoc->next_tsn;
+ init_tsn =
+ sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1;
+ }
+ goto err;
+ }
+ asoc->strreset_inseq++;
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ /* G3: The same processing as though a SACK chunk with no gap report
+ * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
+ * received MUST be performed.
+ */
+ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ /* G1: Compute an appropriate value for the Receiver's Next TSN -- the
+ * TSN that the peer should use to send the next DATA chunk. The
+ * value SHOULD be the smallest TSN not acknowledged by the
+ * receiver of the request plus 2^31.
+ */
+ init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31);
+ sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
+ init_tsn, GFP_ATOMIC);
+
+ /* G4: The same processing as though a FWD-TSN chunk (as defined in
+ * [RFC3758]) with all streams affected and a new cumulative TSN
+ * ACK of the Receiver's Next TSN minus 1 were received MUST be
+ * performed.
+ */
+ sctp_outq_free(&asoc->outqueue);
+
+ /* G2: Compute an appropriate value for the local endpoint's next TSN,
+ * i.e., the next TSN assigned by the receiver of the SSN/TSN reset
+ * chunk. The value SHOULD be the highest TSN sent by the receiver
+ * of the request plus 1.
+ */
+ next_tsn = asoc->next_tsn;
+ asoc->ctsn_ack_point = next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all
+ * incoming and outgoing streams.
+ */
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn,
+ next_tsn, GFP_ATOMIC);
+
+out:
+ sctp_update_strreset_result(asoc, result);
+err:
+ return sctp_make_strreset_tsnresp(asoc, result, request_seq,
+ next_tsn, init_tsn);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_out(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_in *streamin;
+ __u32 request_seq, incnt;
+ __u16 in, i;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ goto err;
+ }
+ asoc->strreset_inseq++;
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_chunk) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
+ /* same process with outstanding isn't 0 */
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+ }
+
+ in = ntohs(addstrm->number_of_streams);
+ incnt = stream->incnt + in;
+ if (!in || incnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamin = krealloc(stream->in, incnt * sizeof(*streamin),
+ GFP_ATOMIC);
+ if (!streamin)
+ goto out;
+
+ memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
+ stream->in = streamin;
+ stream->incnt = incnt;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC);
+
+out:
+ sctp_update_strreset_result(asoc, result);
+err:
+ return sctp_make_strreset_resp(asoc, result, request_seq);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_in(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_out *streamout;
+ struct sctp_chunk *chunk = NULL;
+ __u32 request_seq, outcnt;
+ __u16 out, i;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (TSN_lt(asoc->strreset_inseq, request_seq) ||
+ TSN_lt(request_seq, asoc->strreset_inseq - 2)) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto err;
+ } else if (TSN_lt(request_seq, asoc->strreset_inseq)) {
+ i = asoc->strreset_inseq - request_seq - 1;
+ result = asoc->strreset_result[i];
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+ goto err;
+ }
+ asoc->strreset_inseq++;
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ out = ntohs(addstrm->number_of_streams);
+ outcnt = stream->outcnt + out;
+ if (!out || outcnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
+ GFP_ATOMIC);
+ if (!streamout)
+ goto out;
+
+ memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
+ stream->out = streamout;
+
+ chunk = sctp_make_strreset_addstrm(asoc, out, 0);
+ if (!chunk)
+ goto out;
+
+ asoc->strreset_chunk = chunk;
+ asoc->strreset_outstanding = 1;
+ sctp_chunk_hold(asoc->strreset_chunk);
+
+ stream->outcnt = outcnt;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
+
+out:
+ sctp_update_strreset_result(asoc, result);
+err:
+ if (!chunk)
+ chunk = sctp_make_strreset_resp(asoc, result, request_seq);
+
+ return chunk;
+}
+
+struct sctp_chunk *sctp_process_strreset_resp(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_resp *resp = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ struct sctp_transport *t;
+ __u16 i, nums, flags = 0;
+ sctp_paramhdr_t *req;
+ __u32 result;
+
+ req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
+ if (!req)
+ return NULL;
+
+ result = ntohl(resp->result);
+ if (result != SCTP_STRRESET_PERFORMED) {
+ /* if in progress, do nothing but retransmit */
+ if (result == SCTP_STRRESET_IN_PROGRESS)
+ return NULL;
+ else if (result == SCTP_STRRESET_DENIED)
+ flags = SCTP_STREAM_RESET_DENIED;
+ else
+ flags = SCTP_STREAM_RESET_FAILED;
+ }
+
+ if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
+ struct sctp_strreset_outreq *outreq;
+ __u16 *str_p;
+
+ outreq = (struct sctp_strreset_outreq *)req;
+ str_p = outreq->list_of_streams;
+ nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2;
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ if (nums) {
+ for (i = 0; i < nums; i++)
+ stream->out[ntohs(str_p[i])].ssn = 0;
+ } else {
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ }
+
+ flags = SCTP_STREAM_RESET_OUTGOING_SSN;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
+ struct sctp_strreset_inreq *inreq;
+ __u16 *str_p;
+
+ /* if the result is performed, it's impossible for inreq */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ inreq = (struct sctp_strreset_inreq *)req;
+ str_p = inreq->list_of_streams;
+ nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2;
+
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
+ struct sctp_strreset_resptsn *resptsn;
+ __u32 stsn, rtsn;
+
+ /* check for resptsn, as sctp_verify_reconf didn't do it*/
+ if (ntohs(param.p->length) != sizeof(*resptsn))
+ return NULL;
+
+ resptsn = (struct sctp_strreset_resptsn *)resp;
+ stsn = ntohl(resptsn->senders_next_tsn);
+ rtsn = ntohl(resptsn->receivers_next_tsn);
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ __u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
+ &asoc->peer.tsn_map);
+
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ sctp_tsnmap_init(&asoc->peer.tsn_map,
+ SCTP_TSN_MAP_INITIAL,
+ stsn, GFP_ATOMIC);
+
+ sctp_outq_free(&asoc->outqueue);
+
+ asoc->next_tsn = rtsn;
+ asoc->ctsn_ack_point = asoc->next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags,
+ stsn, rtsn, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+ __u16 number;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+ number = stream->outcnt - nums;
+
+ if (result == SCTP_STRRESET_PERFORMED)
+ for (i = number; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+ else
+ stream->outcnt = number;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ 0, nums, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+
+ /* if the result is performed, it's impossible for addstrm in
+ * request.
+ */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ nums, 0, GFP_ATOMIC);
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ /* remove everything for this reconf request */
+ if (!asoc->strreset_outstanding) {
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+
+ return NULL;
+}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index daf8554fd42a..0e732f68c2bf 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -275,6 +275,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "reconf_enable",
+ .data = &init_net.sctp.reconf_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "auth_enable",
.data = &init_net.sctp.auth_enable,
.maxlen = sizeof(int),
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index c8881bc542a0..ec2b3e013c2f 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -883,6 +883,62 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
return event;
}
+struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event(
+ const struct sctp_association *asoc, __u16 flags, __u32 local_tsn,
+ __u32 remote_tsn, gfp_t gfp)
+{
+ struct sctp_assoc_reset_event *areset;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+
+ event = sctp_ulpevent_new(sizeof(struct sctp_assoc_reset_event),
+ MSG_NOTIFICATION, gfp);
+ if (!event)
+ return NULL;
+
+ skb = sctp_event2skb(event);
+ areset = (struct sctp_assoc_reset_event *)
+ skb_put(skb, sizeof(struct sctp_assoc_reset_event));
+
+ areset->assocreset_type = SCTP_ASSOC_RESET_EVENT;
+ areset->assocreset_flags = flags;
+ areset->assocreset_length = sizeof(struct sctp_assoc_reset_event);
+ sctp_ulpevent_set_owner(event, asoc);
+ areset->assocreset_assoc_id = sctp_assoc2id(asoc);
+ areset->assocreset_local_tsn = local_tsn;
+ areset->assocreset_remote_tsn = remote_tsn;
+
+ return event;
+}
+
+struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
+ const struct sctp_association *asoc, __u16 flags,
+ __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp)
+{
+ struct sctp_stream_change_event *schange;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+
+ event = sctp_ulpevent_new(sizeof(struct sctp_stream_change_event),
+ MSG_NOTIFICATION, gfp);
+ if (!event)
+ return NULL;
+
+ skb = sctp_event2skb(event);
+ schange = (struct sctp_stream_change_event *)
+ skb_put(skb, sizeof(struct sctp_stream_change_event));
+
+ schange->strchange_type = SCTP_STREAM_CHANGE_EVENT;
+ schange->strchange_flags = flags;
+ schange->strchange_length = sizeof(struct sctp_stream_change_event);
+ sctp_ulpevent_set_owner(event, asoc);
+ schange->strchange_assoc_id = sctp_assoc2id(asoc);
+ schange->strchange_instrms = strchange_instrms;
+ schange->strchange_outstrms = strchange_outstrms;
+
+ return event;
+}
+
/* Return the notification type, assuming this is a notification
* event.
*/
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 093803786eac..5b6ee21368a6 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -147,7 +147,6 @@ static int smc_release(struct socket *sock)
schedule_delayed_work(&smc->sock_put_work,
SMC_CLOSE_SOCK_PUT_DELAY);
}
- sk->sk_prot->unhash(sk);
release_sock(sk);
sock_put(sk);
@@ -451,6 +450,9 @@ static int smc_connect_rdma(struct smc_sock *smc)
goto decline_rdma_unlock;
}
+ smc_close_init(smc);
+ smc_rx_init(smc);
+
if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link);
if (rc) {
@@ -477,7 +479,6 @@ static int smc_connect_rdma(struct smc_sock *smc)
mutex_unlock(&smc_create_lgr_pending);
smc_tx_init(smc);
- smc_rx_init(smc);
out_connected:
smc_copy_sock_settings_to_clc(smc);
@@ -637,7 +638,8 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
- /* tbd in follow-on patch: close this sock */
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk);
continue;
}
if (new_sock)
@@ -657,8 +659,13 @@ void smc_close_non_accepted(struct sock *sk)
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (!smc->use_fallback)
+ if (smc->use_fallback) {
+ sk->sk_state = SMC_CLOSED;
+ } else {
smc_close_active(smc);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_shutdown |= SHUTDOWN_MASK;
+ }
if (smc->clcsock) {
struct socket *tcp;
@@ -666,11 +673,9 @@ void smc_close_non_accepted(struct sock *sk)
smc->clcsock = NULL;
sock_release(tcp);
}
- sock_set_flag(sk, SOCK_DEAD);
- sk->sk_shutdown |= SHUTDOWN_MASK;
if (smc->use_fallback) {
schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else {
+ } else if (sk->sk_state == SMC_CLOSED) {
smc_conn_free(&smc->conn);
schedule_delayed_work(&smc->sock_put_work,
SMC_CLOSE_SOCK_PUT_DELAY);
@@ -800,6 +805,9 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
+ smc_close_init(new_smc);
+ smc_rx_init(new_smc);
+
rc = smc_clc_send_accept(new_smc, local_contact);
if (rc)
goto out_err;
@@ -839,7 +847,6 @@ static void smc_listen_work(struct work_struct *work)
}
smc_tx_init(new_smc);
- smc_rx_init(new_smc);
out_connected:
sk_refcnt_debug_inc(newsmcsk);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index ee5fbea24549..6e44313e4467 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -164,6 +164,7 @@ struct smc_connection {
#ifndef KERNEL_HAS_ATOMIC64
spinlock_t acurs_lock; /* protect cursors */
#endif
+ struct work_struct close_work; /* peer sent some closing */
};
struct smc_sock { /* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 5a339493872e..a7294edbc221 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -217,8 +217,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_err = ECONNRESET;
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
}
- if (smc_cdc_rxed_any_close_or_senddone(conn))
- smc_close_passive_received(smc);
+ if (smc_cdc_rxed_any_close_or_senddone(conn)) {
+ smc->sk.sk_shutdown |= RCV_SHUTDOWN;
+ if (smc->clcsock && smc->clcsock->sk)
+ smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
+ sock_set_flag(&smc->sk, SOCK_DONE);
+ schedule_work(&conn->close_work);
+ }
/* piggy backed tx info */
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
@@ -228,8 +233,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc_close_wake_tx_prepared(smc);
}
- /* subsequent patch: trigger socket release if connection closed */
-
/* socket connected but not accepted */
if (!smc->sk.sk_socket)
return;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 67a71d170bed..3c2e166b5d22 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -117,7 +117,6 @@ void smc_close_active_abort(struct smc_sock *smc)
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- bh_lock_sock(&smc->sk);
smc->sk.sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
smc->clcsock->sk->sk_err = ECONNABORTED;
@@ -125,6 +124,7 @@ void smc_close_active_abort(struct smc_sock *smc)
}
switch (smc->sk.sk_state) {
case SMC_INIT:
+ case SMC_ACTIVE:
smc->sk.sk_state = SMC_PEERABORTWAIT;
break;
case SMC_APPCLOSEWAIT1:
@@ -161,10 +161,15 @@ void smc_close_active_abort(struct smc_sock *smc)
}
sock_set_flag(&smc->sk, SOCK_DEAD);
- bh_unlock_sock(&smc->sk);
smc->sk.sk_state_change(&smc->sk);
}
+static inline bool smc_close_sent_any_close(struct smc_connection *conn)
+{
+ return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
+}
+
int smc_close_active(struct smc_sock *smc)
{
struct smc_cdc_conn_state_flags *txflags =
@@ -185,8 +190,7 @@ again:
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
if (smc->smc_listen_work.func)
- flush_work(&smc->smc_listen_work);
- sock_put(sk);
+ cancel_work_sync(&smc->smc_listen_work);
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
@@ -198,7 +202,7 @@ again:
}
release_sock(sk);
smc_close_cleanup_listen(sk);
- flush_work(&smc->tcp_listen_work);
+ cancel_work_sync(&smc->smc_listen_work);
lock_sock(sk);
break;
case SMC_ACTIVE:
@@ -218,7 +222,7 @@ again:
case SMC_APPFINCLOSEWAIT:
/* socket already shutdown wr or both (active close) */
if (txflags->peer_done_writing &&
- !txflags->peer_conn_closed) {
+ !smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
}
@@ -248,6 +252,13 @@ again:
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
+ if (txflags->peer_done_writing &&
+ !smc_close_sent_any_close(conn)) {
+ /* just shutdown wr done, send close request */
+ rc = smc_close_final(conn);
+ }
+ /* peer sending PeerConnectionClosed will cause transition */
+ break;
case SMC_PEERFINCLOSEWAIT:
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -285,7 +296,7 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !txflags->peer_conn_closed) {
+ !smc_close_sent_any_close(&smc->conn)) {
/* just shutdown, but not yet closed locally */
smc_close_abort(&smc->conn);
sk->sk_state = SMC_PROCESSABORT;
@@ -306,22 +317,27 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
* or peer_done_writing.
- * Called under tasklet context.
*/
-void smc_close_passive_received(struct smc_sock *smc)
+static void smc_close_passive_work(struct work_struct *work)
{
- struct smc_cdc_conn_state_flags *rxflags =
- &smc->conn.local_rx_ctrl.conn_state_flags;
+ struct smc_connection *conn = container_of(work,
+ struct smc_connection,
+ close_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ struct smc_cdc_conn_state_flags *rxflags;
struct sock *sk = &smc->sk;
int old_state;
- sk->sk_shutdown |= RCV_SHUTDOWN;
- if (smc->clcsock && smc->clcsock->sk)
- smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
- sock_set_flag(&smc->sk, SOCK_DONE);
-
+ lock_sock(&smc->sk);
old_state = sk->sk_state;
+ if (!conn->alert_token_local) {
+ /* abnormal termination */
+ smc_close_active_abort(smc);
+ goto wakeup;
+ }
+
+ rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
smc_close_passive_abort_received(smc);
goto wakeup;
@@ -331,7 +347,7 @@ void smc_close_passive_received(struct smc_sock *smc)
case SMC_INIT:
if (atomic_read(&smc->conn.bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !rxflags->peer_conn_closed))
+ !smc_cdc_rxed_any_close(conn)))
sk->sk_state = SMC_APPCLOSEWAIT1;
else
sk->sk_state = SMC_CLOSED;
@@ -348,7 +364,7 @@ void smc_close_passive_received(struct smc_sock *smc)
if (!smc_cdc_rxed_any_close(&smc->conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
- (sk->sk_shutdown == SHUTDOWN_MASK)) {
+ smc_close_sent_any_close(conn)) {
/* smc_release has already been called locally */
sk->sk_state = SMC_CLOSED;
} else {
@@ -367,17 +383,19 @@ void smc_close_passive_received(struct smc_sock *smc)
}
wakeup:
- if (old_state != sk->sk_state)
- sk->sk_state_change(sk);
sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
- if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ if (old_state != sk->sk_state) {
+ sk->sk_state_change(sk);
+ if ((sk->sk_state == SMC_CLOSED) &&
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
+ smc_conn_free(&smc->conn);
+ schedule_delayed_work(&smc->sock_put_work,
+ SMC_CLOSE_SOCK_PUT_DELAY);
+ }
}
+ release_sock(&smc->sk);
}
void smc_close_sock_put_work(struct work_struct *work)
@@ -442,3 +460,9 @@ again:
sk->sk_state_change(&smc->sk);
return rc;
}
+
+/* Initialize close properties on connection establishment. */
+void smc_close_init(struct smc_sock *smc)
+{
+ INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
+}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index bc9a2df3633c..4a3d99a8d7cb 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -21,8 +21,8 @@
void smc_close_wake_tx_prepared(struct smc_sock *smc);
void smc_close_active_abort(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_passive_received(struct smc_sock *smc);
void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
+void smc_close_init(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 0eac633fb354..65020e93ff21 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -316,7 +316,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
smc = container_of(conn, struct smc_sock, conn);
sock_hold(&smc->sk);
__smc_lgr_unregister_conn(conn);
- smc_close_active_abort(smc);
+ schedule_work(&conn->close_work);
sock_put(&smc->sk);
node = rb_first(&lgr->conns_all);
}
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index e6743c008ac5..16b7c801f8b6 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -179,8 +179,6 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
u8 port_idx;
smcibdev = container_of(handler, struct smc_ib_device, event_handler);
- if (!smc_pnet_find_ib(smcibdev->ibdev->name))
- return;
switch (ibevent->event) {
case IB_EVENT_PORT_ERR:
@@ -259,7 +257,6 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = 1,
- .max_inline_data = SMC_WR_TX_SIZE,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC,
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index a95f74bb5569..7e1f0e24d177 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -11,6 +11,7 @@
#ifndef _SMC_IB_H
#define _SMC_IB_H
+#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <rdma/ib_verbs.h>
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 9d3e7fb8348d..78f7af28ae4f 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -219,7 +219,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
}
/* Find an infiniband device by a given name. The device might not exist. */
-struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
+static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
{
struct smc_ib_device *ibdev;
@@ -523,8 +523,11 @@ void smc_pnet_find_roce_resource(struct sock *sk,
read_lock(&smc_pnettable.lock);
list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
if (dst->dev == pnetelem->ndev) {
- *smcibdev = pnetelem->smcibdev;
- *ibport = pnetelem->ib_port;
+ if (smc_ib_port_active(pnetelem->smcibdev,
+ pnetelem->ib_port)) {
+ *smcibdev = pnetelem->smcibdev;
+ *ibport = pnetelem->ib_port;
+ }
break;
}
}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 32ab3df928ca..c4f1bccd4358 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -16,7 +16,6 @@ struct smc_ib_device;
int smc_pnet_init(void) __init;
void smc_pnet_exit(void);
int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
-struct smc_ib_device *smc_pnet_find_ib(char *ib_name);
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index c4ef9a4ec569..f0c8b089f770 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -36,11 +36,10 @@ static void smc_rx_data_ready(struct sock *sk)
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
- else
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 69a0013dd25c..21ec1832ab51 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -431,9 +431,13 @@ static void smc_tx_work(struct work_struct *work)
struct smc_connection,
tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ int rc;
lock_sock(&smc->sk);
- smc_tx_sndbuf_nonempty(conn);
+ rc = smc_tx_sndbuf_nonempty(conn);
+ if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
+ !atomic_read(&conn->bytes_to_rcv))
+ conn->local_rx_ctrl.prod_flags.write_blocked = 0;
release_sock(&smc->sk);
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index eadf157418dc..874ee9f9d796 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -447,7 +447,7 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_ibs[i].num_sge = 1;
lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
lnk->wr_tx_ibs[i].send_flags =
- IB_SEND_SIGNALED | IB_SEND_SOLICITED | IB_SEND_INLINE;
+ IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
for (i = 0; i < lnk->wr_rx_cnt; i++) {
lnk->wr_rx_sges[i].addr =
diff --git a/net/socket.c b/net/socket.c
index 985ef06792d6..c2564eb25c6b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3356,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket. Assumes that the caller has a lock on the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+ struct inet_sock *inet;
+ struct ip_options_rcu *opt;
+ u32 overhead = 0;
+ bool owned_by_user;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6_pinfo *np;
+ struct ipv6_txoptions *optv6 = NULL;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+ if (!sk)
+ return overhead;
+
+ owned_by_user = sock_owned_by_user(sk);
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet = inet_sk(sk);
+ overhead += sizeof(struct iphdr);
+ opt = rcu_dereference_protected(inet->inet_opt,
+ owned_by_user);
+ if (opt)
+ overhead += opt->opt.optlen;
+ return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ np = inet6_sk(sk);
+ overhead += sizeof(struct ipv6hdr);
+ if (np)
+ optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+ if (optv6)
+ overhead += (optv6->opt_flen + optv6->opt_nflen);
+ return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+ default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+ return overhead;
+ }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 017801f9dbaa..8d40a7d31c99 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -826,7 +826,7 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
int err;
err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
- switchdev_port_bridge_policy);
+ switchdev_port_bridge_policy, NULL);
if (err)
return err;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 33a5bdfbef76..d174ee3254ee 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -802,7 +802,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -851,7 +851,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -891,7 +891,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -939,7 +939,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -982,7 +982,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, info->extack);
if (err)
return err;
@@ -1104,7 +1104,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
info->attrs[TIPC_NLA_MEDIA],
- tipc_nl_media_policy);
+ tipc_nl_media_policy, info->extack);
if (err)
return err;
@@ -1152,7 +1152,7 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
info->attrs[TIPC_NLA_MEDIA],
- tipc_nl_media_policy);
+ tipc_nl_media_policy, info->extack);
if (!attrs[TIPC_NLA_MEDIA_NAME])
return -EINVAL;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ddd2dd6f77aa..60820dc35a08 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1827,7 +1827,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[])
int err;
err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop,
- tipc_nl_prop_policy);
+ tipc_nl_prop_policy, NULL);
if (err)
return err;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 9be6592e4a6f..bd0aac87b41a 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+ tipc_subscrp_get(s);
list_add(&s->nameseq_list, &nseq->subscriptions);
if (!sseq)
@@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
if (seq != NULL) {
spin_lock_bh(&seq->lock);
list_del_init(&s->nameseq_list);
+ tipc_subscrp_put(s);
if (!seq->first_free && list_empty(&seq->subscriptions)) {
hlist_del_init_rcu(&seq->ns_list);
kfree(seq->sseqs);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index ab8a2d5d1e32..719c5924b638 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -211,8 +211,8 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
- info->attrs[TIPC_NLA_NET],
- tipc_nl_net_policy);
+ info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
+ info->extack);
if (err)
return err;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 26ca8dd64ded..b76f13f6fea1 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -268,7 +268,8 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
if (!*attr)
return -EOPNOTSUPP;
- return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy);
+ return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy,
+ NULL);
}
int __init tipc_netlink_start(void)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index e1ae8a8a2b8e..9bfe886ab330 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -296,7 +296,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
err = nla_parse(attrbuf, tipc_genl_family.maxattr,
(const struct nlattr *)trans_buf->data,
- trans_buf->len, NULL);
+ trans_buf->len, NULL, NULL);
if (err)
goto parse_out;
@@ -352,7 +352,7 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX,
- attrs[TIPC_NLA_BEARER], NULL);
+ attrs[TIPC_NLA_BEARER], NULL, NULL);
if (err)
return err;
@@ -472,7 +472,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -480,7 +480,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX,
- link[TIPC_NLA_LINK_PROP], NULL);
+ link[TIPC_NLA_LINK_PROP], NULL, NULL);
if (err)
return err;
@@ -488,7 +488,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX,
- link[TIPC_NLA_LINK_STATS], NULL);
+ link[TIPC_NLA_LINK_STATS], NULL, NULL);
if (err)
return err;
@@ -598,7 +598,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -795,7 +795,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
- attrs[TIPC_NLA_NAME_TABLE], NULL);
+ attrs[TIPC_NLA_NAME_TABLE], NULL, NULL);
if (err)
return err;
@@ -803,7 +803,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX,
- nt[TIPC_NLA_NAME_TABLE_PUBL], NULL);
+ nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL);
if (err)
return err;
@@ -863,7 +863,7 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -929,7 +929,7 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -940,8 +940,8 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg,
u32 node;
struct nlattr *con[TIPC_NLA_CON_MAX + 1];
- nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON],
- NULL);
+ nla_parse_nested(con, TIPC_NLA_CON_MAX,
+ sock[TIPC_NLA_SOCK_CON], NULL, NULL);
node = nla_get_u32(con[TIPC_NLA_CON_NODE]);
tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>",
@@ -977,8 +977,8 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg,
if (!attrs[TIPC_NLA_MEDIA])
return -EINVAL;
- err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA],
- NULL);
+ err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX,
+ attrs[TIPC_NLA_MEDIA], NULL, NULL);
if (err)
return err;
@@ -998,7 +998,7 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE],
- NULL);
+ NULL, NULL);
if (err)
return err;
@@ -1045,7 +1045,7 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET],
- NULL);
+ NULL, NULL);
if (err)
return err;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4512e83652b1..aeef8011ac7d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1607,8 +1607,8 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
- info->attrs[TIPC_NLA_NET],
- tipc_nl_net_policy);
+ info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
+ info->extack);
if (err)
return err;
@@ -1774,7 +1774,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
info->attrs[TIPC_NLA_LINK],
- tipc_nl_link_policy);
+ tipc_nl_link_policy, info->extack);
if (err)
return err;
@@ -1902,7 +1902,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX,
info->attrs[TIPC_NLA_LINK],
- tipc_nl_link_policy);
+ tipc_nl_link_policy, info->extack);
if (err)
return err;
@@ -2042,7 +2042,7 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX,
info->attrs[TIPC_NLA_MON],
- tipc_nl_monitor_policy);
+ tipc_nl_monitor_policy, info->extack);
if (err)
return err;
@@ -2098,6 +2098,8 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info)
int err;
msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg.skb)
+ return -ENOMEM;
msg.portid = info->snd_portid;
msg.seq = info->snd_seq;
@@ -2163,7 +2165,7 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
err = nla_parse_nested(mon, TIPC_NLA_MON_MAX,
attrs[TIPC_NLA_MON],
- tipc_nl_monitor_policy);
+ tipc_nl_monitor_policy, NULL);
if (err)
return err;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 566906795c8c..8a4e9fe5f9eb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2511,6 +2511,28 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
}
+static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
+{
+ struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
+ struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
+ u32 onode = tipc_own_addr(sock_net(sock1->sk));
+
+ tsk1->peer.family = AF_TIPC;
+ tsk1->peer.addrtype = TIPC_ADDR_ID;
+ tsk1->peer.scope = TIPC_NODE_SCOPE;
+ tsk1->peer.addr.id.ref = tsk2->portid;
+ tsk1->peer.addr.id.node = onode;
+ tsk2->peer.family = AF_TIPC;
+ tsk2->peer.addrtype = TIPC_ADDR_ID;
+ tsk2->peer.scope = TIPC_NODE_SCOPE;
+ tsk2->peer.addr.id.ref = tsk1->portid;
+ tsk2->peer.addr.id.node = onode;
+
+ tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
+ tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
+ return 0;
+}
+
/* Protocol switches for the various types of TIPC sockets */
static const struct proto_ops msg_ops = {
@@ -2519,7 +2541,7 @@ static const struct proto_ops msg_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = sock_no_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2540,7 +2562,7 @@ static const struct proto_ops packet_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2561,7 +2583,7 @@ static const struct proto_ops stream_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2844,7 +2866,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
attrs[TIPC_NLA_SOCK],
- tipc_nl_sock_policy);
+ tipc_nl_sock_policy, NULL);
if (err)
return err;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 271cd66e4b3b..0bf91cd3733c 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -54,8 +54,6 @@ struct tipc_subscriber {
static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-static void tipc_subscrp_put(struct tipc_subscription *subscription);
-static void tipc_subscrp_get(struct tipc_subscription *subscription);
/**
* htohl - convert value to endianness used by destination
@@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_name_seq seq;
- tipc_subscrp_get(sub);
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
@@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
node);
- tipc_subscrp_put(sub);
}
static void tipc_subscrp_timeout(unsigned long data)
@@ -145,6 +141,7 @@ static void tipc_subscrp_timeout(unsigned long data)
spin_lock_bh(&subscriber->lock);
tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
spin_unlock_bh(&subscriber->lock);
/* Notify subscriber of timeout */
@@ -177,20 +174,17 @@ static void tipc_subscrp_kref_release(struct kref *kref)
struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
struct tipc_subscriber *subscriber = sub->subscriber;
- spin_lock_bh(&subscriber->lock);
- list_del(&sub->subscrp_list);
atomic_dec(&tn->subscription_count);
- spin_unlock_bh(&subscriber->lock);
kfree(sub);
tipc_subscrb_put(subscriber);
}
-static void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_subscrp_put(struct tipc_subscription *subscription)
{
kref_put(&subscription->kref, tipc_subscrp_kref_release);
}
-static void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_subscrp_get(struct tipc_subscription *subscription)
{
kref_get(&subscription->kref);
}
@@ -210,11 +204,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
continue;
tipc_nametbl_unsubscribe(sub);
- tipc_subscrp_get(sub);
- spin_unlock_bh(&subscriber->lock);
+ list_del(&sub->subscrp_list);
tipc_subscrp_delete(sub);
- tipc_subscrp_put(sub);
- spin_lock_bh(&subscriber->lock);
if (s)
break;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ffdc214c117a..ee52957dc952 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
int tipc_topsrv_start(struct net *net);
void tipc_topsrv_stop(struct net *net);
+void tipc_subscrp_put(struct tipc_subscription *subscription);
+void tipc_subscrp_get(struct tipc_subscription *subscription);
+
#endif
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 46061cf48cd1..ecca64fc6a6f 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -457,7 +457,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX,
attrs[TIPC_NLA_BEARER],
- tipc_nl_bearer_policy);
+ tipc_nl_bearer_policy, NULL);
if (err)
return err;
@@ -609,7 +609,8 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
struct udp_media_addr *dst;
- if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
+ if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr,
+ tipc_nl_udp_policy, NULL))
return -EINVAL;
if (!opts[TIPC_NLA_UDP_REMOTE])
@@ -662,7 +663,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
attrs[TIPC_NLA_BEARER_UDP_OPTS],
- tipc_nl_udp_policy))
+ tipc_nl_udp_policy, NULL))
goto err;
if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 928691c43408..6a7fe7660551 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -996,7 +996,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
- struct path path = { NULL, NULL };
+ struct path path = { };
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index bc27c70e0e59..09fc2eb29dc8 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
-vsock-y += af_vsock.o vsock_addr.o
+vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock_tap.c b/net/vmw_vsock/af_vsock_tap.c
new file mode 100644
index 000000000000..98f09b539366
--- /dev/null
+++ b/net/vmw_vsock/af_vsock_tap.c
@@ -0,0 +1,114 @@
+/*
+ * Tap functions for AF_VSOCK sockets.
+ *
+ * Code based on net/netlink/af_netlink.c tap functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/af_vsock.h>
+#include <linux/if_arp.h>
+
+static DEFINE_SPINLOCK(vsock_tap_lock);
+static struct list_head vsock_tap_all __read_mostly =
+ LIST_HEAD_INIT(vsock_tap_all);
+
+int vsock_add_tap(struct vsock_tap *vt)
+{
+ if (unlikely(vt->dev->type != ARPHRD_VSOCKMON))
+ return -EINVAL;
+
+ __module_get(vt->module);
+
+ spin_lock(&vsock_tap_lock);
+ list_add_rcu(&vt->list, &vsock_tap_all);
+ spin_unlock(&vsock_tap_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_add_tap);
+
+int vsock_remove_tap(struct vsock_tap *vt)
+{
+ struct vsock_tap *tmp;
+ bool found = false;
+
+ spin_lock(&vsock_tap_lock);
+
+ list_for_each_entry(tmp, &vsock_tap_all, list) {
+ if (vt == tmp) {
+ list_del_rcu(&vt->list);
+ found = true;
+ goto out;
+ }
+ }
+
+ pr_warn("vsock_remove_tap: %p not found\n", vt);
+out:
+ spin_unlock(&vsock_tap_lock);
+
+ synchronize_net();
+
+ if (found)
+ module_put(vt->module);
+
+ return found ? 0 : -ENODEV;
+}
+EXPORT_SYMBOL_GPL(vsock_remove_tap);
+
+static int __vsock_deliver_tap_skb(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ int ret = 0;
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (nskb) {
+ dev_hold(dev);
+
+ nskb->dev = dev;
+ ret = dev_queue_xmit(nskb);
+ if (unlikely(ret > 0))
+ ret = net_xmit_errno(ret);
+
+ dev_put(dev);
+ }
+
+ return ret;
+}
+
+static void __vsock_deliver_tap(struct sk_buff *skb)
+{
+ int ret;
+ struct vsock_tap *tmp;
+
+ list_for_each_entry_rcu(tmp, &vsock_tap_all, list) {
+ ret = __vsock_deliver_tap_skb(skb, tmp->dev);
+ if (unlikely(ret))
+ break;
+ }
+}
+
+void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque)
+{
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+
+ if (likely(list_empty(&vsock_tap_all)))
+ goto out;
+
+ skb = build_skb(opaque);
+ if (skb) {
+ __vsock_deliver_tap(skb);
+ consume_skb(skb);
+ }
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(vsock_deliver_tap);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 68675a151f22..9dffe0282ad4 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -144,6 +144,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
list_del_init(&pkt->list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
+ virtio_transport_deliver_tap_pkt(pkt);
+
reply = pkt->reply;
sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
@@ -370,6 +372,7 @@ static void virtio_transport_rx_work(struct work_struct *work)
}
pkt->len = len - sizeof(pkt->hdr);
+ virtio_transport_deliver_tap_pkt(pkt);
virtio_transport_recv_pkt(pkt);
}
} while (!virtqueue_enable_cb(vq));
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index af087b44ceea..18e24793659f 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -16,6 +16,7 @@
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
#include <linux/virtio_vsock.h>
+#include <uapi/linux/vsockmon.h>
#include <net/sock.h>
#include <net/af_vsock.h>
@@ -85,6 +86,69 @@ out_pkt:
return NULL;
}
+/* Packet capture */
+static struct sk_buff *virtio_transport_build_skb(void *opaque)
+{
+ struct virtio_vsock_pkt *pkt = opaque;
+ unsigned char *t_hdr, *payload;
+ struct af_vsockmon_hdr *hdr;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len,
+ GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ hdr = (struct af_vsockmon_hdr *)skb_put(skb, sizeof(*hdr));
+
+ /* pkt->hdr is little-endian so no need to byteswap here */
+ hdr->src_cid = pkt->hdr.src_cid;
+ hdr->src_port = pkt->hdr.src_port;
+ hdr->dst_cid = pkt->hdr.dst_cid;
+ hdr->dst_port = pkt->hdr.dst_port;
+
+ hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
+ hdr->len = cpu_to_le16(sizeof(pkt->hdr));
+ memset(hdr->reserved, 0, sizeof(hdr->reserved));
+
+ switch (le16_to_cpu(pkt->hdr.op)) {
+ case VIRTIO_VSOCK_OP_REQUEST:
+ case VIRTIO_VSOCK_OP_RESPONSE:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
+ break;
+ case VIRTIO_VSOCK_OP_RST:
+ case VIRTIO_VSOCK_OP_SHUTDOWN:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
+ break;
+ case VIRTIO_VSOCK_OP_RW:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
+ break;
+ case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
+ case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
+ break;
+ default:
+ hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
+ break;
+ }
+
+ t_hdr = skb_put(skb, sizeof(pkt->hdr));
+ memcpy(t_hdr, &pkt->hdr, sizeof(pkt->hdr));
+
+ if (pkt->len) {
+ payload = skb_put(skb, pkt->len);
+ memcpy(payload, pkt->buf, pkt->len);
+ }
+
+ return skb;
+}
+
+void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
+{
+ vsock_deliver_tap(virtio_transport_build_skb, pkt);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
+
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 4be4fbbc0b50..10ae7823a19d 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -96,31 +96,23 @@ static int PROTOCOL_OVERRIDE = -1;
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
{
- int err;
-
switch (vmci_error) {
case VMCI_ERROR_NO_MEM:
- err = ENOMEM;
- break;
+ return -ENOMEM;
case VMCI_ERROR_DUPLICATE_ENTRY:
case VMCI_ERROR_ALREADY_EXISTS:
- err = EADDRINUSE;
- break;
+ return -EADDRINUSE;
case VMCI_ERROR_NO_ACCESS:
- err = EPERM;
- break;
+ return -EPERM;
case VMCI_ERROR_NO_RESOURCES:
- err = ENOBUFS;
- break;
+ return -ENOBUFS;
case VMCI_ERROR_INVALID_RESOURCE:
- err = EHOSTUNREACH;
- break;
+ return -EHOSTUNREACH;
case VMCI_ERROR_INVALID_ARGS:
default:
- err = EINVAL;
+ break;
}
-
- return err > 0 ? -err : err;
+ return -EINVAL;
}
static u32 vmci_transport_peer_rid(u32 peer_cid)
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index bdad1f951561..25666d3009be 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -32,6 +32,11 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
rdev_set_qos_map(rdev, dev, NULL);
if (notify)
nl80211_send_ap_stopped(wdev);
+
+ /* Should we apply the grace period during beaconing interface
+ * shutdown also?
+ */
+ cfg80211_sched_dfs_chan_update(rdev);
}
return err;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 5497d022fada..b8aa5a7d5c77 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -456,6 +456,123 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
return (r1 + r2 > 0);
}
+/*
+ * Checks if center frequency of chan falls with in the bandwidth
+ * range of chandef.
+ */
+bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
+ struct ieee80211_channel *chan)
+{
+ int width;
+ u32 cf_offset, freq;
+
+ if (chandef->chan->center_freq == chan->center_freq)
+ return true;
+
+ width = cfg80211_chandef_get_width(chandef);
+ if (width <= 20)
+ return false;
+
+ cf_offset = width / 2 - 10;
+
+ for (freq = chandef->center_freq1 - width / 2 + 10;
+ freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
+ if (chan->center_freq == freq)
+ return true;
+ }
+
+ if (!chandef->center_freq2)
+ return false;
+
+ for (freq = chandef->center_freq2 - width / 2 + 10;
+ freq <= chandef->center_freq2 + width / 2 - 10; freq += 20) {
+ if (chan->center_freq == freq)
+ return true;
+ }
+
+ return false;
+}
+
+bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev)
+{
+ bool active = false;
+
+ ASSERT_WDEV_LOCK(wdev);
+
+ if (!wdev->chandef.chan)
+ return false;
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ active = wdev->beacon_interval != 0;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ active = wdev->ssid_len != 0;
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ active = wdev->mesh_id_len != 0;
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_OCB:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_WDS:
+ case NL80211_IFTYPE_P2P_DEVICE:
+ /* Can NAN type be considered as beaconing interface? */
+ case NL80211_IFTYPE_NAN:
+ break;
+ case NL80211_IFTYPE_UNSPECIFIED:
+ case NUM_NL80211_IFTYPES:
+ WARN_ON(1);
+ }
+
+ return active;
+}
+
+static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy,
+ struct ieee80211_channel *chan)
+{
+ struct wireless_dev *wdev;
+
+ list_for_each_entry(wdev, &wiphy->wdev_list, list) {
+ wdev_lock(wdev);
+ if (!cfg80211_beaconing_iface_active(wdev)) {
+ wdev_unlock(wdev);
+ continue;
+ }
+
+ if (cfg80211_is_sub_chan(&wdev->chandef, chan)) {
+ wdev_unlock(wdev);
+ return true;
+ }
+ wdev_unlock(wdev);
+ }
+
+ return false;
+}
+
+bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
+ struct ieee80211_channel *chan)
+{
+ struct cfg80211_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ if (!(chan->flags & IEEE80211_CHAN_RADAR))
+ return false;
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ if (!reg_dfs_domain_same(wiphy, &rdev->wiphy))
+ continue;
+
+ if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan))
+ return true;
+ }
+
+ return false;
+}
static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
u32 center_freq,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index e55e05bc4805..b0d6761f0cdd 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -357,6 +357,38 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
rtnl_unlock();
}
+static void cfg80211_propagate_radar_detect_wk(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+
+ rdev = container_of(work, struct cfg80211_registered_device,
+ propagate_radar_detect_wk);
+
+ rtnl_lock();
+
+ regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->radar_chandef,
+ NL80211_DFS_UNAVAILABLE,
+ NL80211_RADAR_DETECTED);
+
+ rtnl_unlock();
+}
+
+static void cfg80211_propagate_cac_done_wk(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+
+ rdev = container_of(work, struct cfg80211_registered_device,
+ propagate_cac_done_wk);
+
+ rtnl_lock();
+
+ regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->cac_done_chandef,
+ NL80211_DFS_AVAILABLE,
+ NL80211_RADAR_CAC_FINISHED);
+
+ rtnl_unlock();
+}
+
/* exported functions */
struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
@@ -456,6 +488,9 @@ use_default_name:
spin_lock_init(&rdev->destroy_list_lock);
INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
+ INIT_WORK(&rdev->propagate_radar_detect_wk,
+ cfg80211_propagate_radar_detect_wk);
+ INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -915,6 +950,8 @@ void wiphy_unregister(struct wiphy *wiphy)
flush_work(&rdev->destroy_work);
flush_work(&rdev->sched_scan_stop_wk);
flush_work(&rdev->mlme_unreg_wk);
+ flush_work(&rdev->propagate_radar_detect_wk);
+ flush_work(&rdev->propagate_cac_done_wk);
#ifdef CONFIG_PM
if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
@@ -954,6 +991,12 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
+void cfg80211_cqm_config_free(struct wireless_dev *wdev)
+{
+ kfree(wdev->cqm_config);
+ wdev->cqm_config = NULL;
+}
+
void cfg80211_unregister_wdev(struct wireless_dev *wdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -980,6 +1023,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
WARN_ON_ONCE(1);
break;
}
+
+ cfg80211_cqm_config_free(wdev);
}
EXPORT_SYMBOL(cfg80211_unregister_wdev);
@@ -1114,7 +1159,15 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
INIT_LIST_HEAD(&wdev->mgmt_registrations);
spin_lock_init(&wdev->mgmt_registrations_lock);
- wdev->identifier = ++rdev->wdev_id;
+ /*
+ * We get here also when the interface changes network namespaces,
+ * as it's registered into the new one, but we don't want it to
+ * change ID in that case. Checking if the ID is already assigned
+ * works, because 0 isn't considered a valid ID and the memory is
+ * 0-initialized.
+ */
+ if (!wdev->identifier)
+ wdev->identifier = ++rdev->wdev_id;
list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
rdev->devlist_generation++;
/* can only change netns with wiphy */
@@ -1208,12 +1261,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
*/
if ((wdev->iftype == NL80211_IFTYPE_STATION ||
wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
- rdev->ops->set_power_mgmt)
- if (rdev_set_power_mgmt(rdev, dev, wdev->ps,
- wdev->ps_timeout)) {
- /* assume this means it's off */
- wdev->ps = false;
- }
+ rdev->ops->set_power_mgmt &&
+ rdev_set_power_mgmt(rdev, dev, wdev->ps,
+ wdev->ps_timeout)) {
+ /* assume this means it's off */
+ wdev->ps = false;
+ }
break;
case NETDEV_UNREGISTER:
/*
@@ -1234,6 +1287,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
kzfree(wdev->wext.keys);
#endif
flush_work(&wdev->disconnect_wk);
+ cfg80211_cqm_config_free(wdev);
}
/*
* synchronise (so that we won't find this netdev
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 58ca206982fe..5d27eca57d3b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -97,6 +97,12 @@ struct cfg80211_registered_device {
struct work_struct sched_scan_stop_wk;
+ struct cfg80211_chan_def radar_chandef;
+ struct work_struct propagate_radar_detect_wk;
+
+ struct cfg80211_chan_def cac_done_chandef;
+ struct work_struct propagate_cac_done_wk;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -220,16 +226,7 @@ struct cfg80211_event {
enum cfg80211_event_type type;
union {
- struct {
- u8 bssid[ETH_ALEN];
- const u8 *req_ie;
- const u8 *resp_ie;
- size_t req_ie_len;
- size_t resp_ie_len;
- struct cfg80211_bss *bss;
- int status; /* -1 = failed; 0..65535 = status code */
- enum nl80211_timeout_reason timeout_reason;
- } cr;
+ struct cfg80211_connect_resp_params cr;
struct {
const u8 *req_ie;
const u8 *resp_ie;
@@ -272,6 +269,13 @@ struct cfg80211_iface_destroy {
u32 nlportid;
};
+struct cfg80211_cqm_config {
+ u32 rssi_hyst;
+ s32 last_rssi_event_value;
+ int n_rssi_thresholds;
+ s32 rssi_thresholds[0];
+};
+
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
/* free object */
@@ -385,12 +389,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
struct cfg80211_connect_params *connect,
struct cfg80211_cached_keys *connkeys,
const u8 *prev_bssid);
-void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status, bool wextev,
- struct cfg80211_bss *bss,
- enum nl80211_timeout_reason timeout_reason);
+void __cfg80211_connect_result(struct net_device *dev,
+ struct cfg80211_connect_resp_params *params,
+ bool wextev);
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap);
int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
@@ -429,7 +430,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
void cfg80211_upload_connect_keys(struct wireless_dev *wdev);
int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
- u32 *flags, struct vif_params *params);
+ struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
void cfg80211_process_wdev_events(struct wireless_dev *wdev);
@@ -459,6 +460,16 @@ unsigned int
cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef);
+void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev);
+
+bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
+ struct ieee80211_channel *chan);
+
+bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev);
+
+bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
+ struct ieee80211_channel *chan);
+
static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
unsigned long end = jiffies;
@@ -512,4 +523,6 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
#define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; })
#endif
+void cfg80211_cqm_config_free(struct wireless_dev *wdev);
+
#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 364f900a3dc4..10bf040a0982 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -190,6 +190,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
if (!nowext)
wdev->wext.ibss.ssid_len = 0;
#endif
+ cfg80211_sched_dfs_chan_update(rdev);
}
void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 2d8518a37eab..ec0b1c20ac99 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -262,6 +262,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
wdev->beacon_interval = 0;
memset(&wdev->chandef, 0, sizeof(wdev->chandef));
rdev_set_qos_map(rdev, dev, NULL);
+ cfg80211_sched_dfs_chan_update(rdev);
}
return err;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 22b3d9990065..d8df7a5180a0 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -26,9 +26,16 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
- u8 *ie = mgmt->u.assoc_resp.variable;
- int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
- u16 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
+ struct cfg80211_connect_resp_params cr;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.status = (int)le16_to_cpu(mgmt->u.assoc_resp.status_code);
+ cr.bssid = mgmt->bssid;
+ cr.bss = bss;
+ cr.resp_ie = mgmt->u.assoc_resp.variable;
+ cr.resp_ie_len =
+ len - offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
+ cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED;
trace_cfg80211_send_rx_assoc(dev, bss);
@@ -38,7 +45,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
* and got a reject -- we only try again with an assoc
* frame instead of reassoc.
*/
- if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) {
+ if (cfg80211_sme_rx_assoc_resp(wdev, cr.status)) {
cfg80211_unhold_bss(bss_from_pub(bss));
cfg80211_put_bss(wiphy, bss);
return;
@@ -46,10 +53,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues);
/* update current_bss etc., consumes the bss reference */
- __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
- status_code,
- status_code == WLAN_STATUS_SUCCESS, bss,
- NL80211_TIMEOUT_UNSPECIFIED);
+ __cfg80211_connect_result(dev, &cr, cr.status == WLAN_STATUS_SUCCESS);
}
EXPORT_SYMBOL(cfg80211_rx_assoc_resp);
@@ -745,6 +749,12 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
}
EXPORT_SYMBOL(cfg80211_rx_mgmt);
+void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev)
+{
+ cancel_delayed_work(&rdev->dfs_update_channels_wk);
+ queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, 0);
+}
+
void cfg80211_dfs_channels_update_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
@@ -755,6 +765,8 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
struct wiphy *wiphy;
bool check_again = false;
unsigned long timeout, next_time = 0;
+ unsigned long time_dfs_update;
+ enum nl80211_radar_event radar_event;
int bandid, i;
rdev = container_of(delayed_work, struct cfg80211_registered_device,
@@ -770,11 +782,27 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
for (i = 0; i < sband->n_channels; i++) {
c = &sband->channels[i];
- if (c->dfs_state != NL80211_DFS_UNAVAILABLE)
+ if (!(c->flags & IEEE80211_CHAN_RADAR))
+ continue;
+
+ if (c->dfs_state != NL80211_DFS_UNAVAILABLE &&
+ c->dfs_state != NL80211_DFS_AVAILABLE)
continue;
- timeout = c->dfs_state_entered + msecs_to_jiffies(
- IEEE80211_DFS_MIN_NOP_TIME_MS);
+ if (c->dfs_state == NL80211_DFS_UNAVAILABLE) {
+ time_dfs_update = IEEE80211_DFS_MIN_NOP_TIME_MS;
+ radar_event = NL80211_RADAR_NOP_FINISHED;
+ } else {
+ if (regulatory_pre_cac_allowed(wiphy) ||
+ cfg80211_any_wiphy_oper_chan(wiphy, c))
+ continue;
+
+ time_dfs_update = REG_PRE_CAC_EXPIRY_GRACE_MS;
+ radar_event = NL80211_RADAR_PRE_CAC_EXPIRED;
+ }
+
+ timeout = c->dfs_state_entered +
+ msecs_to_jiffies(time_dfs_update);
if (time_after_eq(jiffies, timeout)) {
c->dfs_state = NL80211_DFS_USABLE;
@@ -784,8 +812,12 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
NL80211_CHAN_NO_HT);
nl80211_radar_notify(rdev, &chandef,
- NL80211_RADAR_NOP_FINISHED,
- NULL, GFP_ATOMIC);
+ radar_event, NULL,
+ GFP_ATOMIC);
+
+ regulatory_propagate_dfs_state(wiphy, &chandef,
+ c->dfs_state,
+ radar_event);
continue;
}
@@ -810,7 +842,6 @@ void cfg80211_radar_event(struct wiphy *wiphy,
gfp_t gfp)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- unsigned long timeout;
trace_cfg80211_radar_event(wiphy, chandef);
@@ -820,11 +851,12 @@ void cfg80211_radar_event(struct wiphy *wiphy,
*/
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE);
- timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS);
- queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk,
- timeout);
+ cfg80211_sched_dfs_chan_update(rdev);
nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp);
+
+ memcpy(&rdev->radar_chandef, chandef, sizeof(struct cfg80211_chan_def));
+ queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk);
}
EXPORT_SYMBOL(cfg80211_radar_event);
@@ -851,6 +883,10 @@ void cfg80211_cac_event(struct net_device *netdev,
msecs_to_jiffies(wdev->cac_time_ms);
WARN_ON(!time_after_eq(jiffies, timeout));
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE);
+ memcpy(&rdev->cac_done_chandef, chandef,
+ sizeof(struct cfg80211_chan_def));
+ queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk);
+ cfg80211_sched_dfs_chan_update(rdev);
break;
case NL80211_RADAR_CAC_ABORTED:
break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2312dc2ffdb9..50c35affccad 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -410,6 +410,15 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
.len = sizeof(struct nl80211_bss_select_rssi_adjust)
},
[NL80211_ATTR_TIMEOUT_REASON] = { .type = NLA_U32 },
+ [NL80211_ATTR_FILS_ERP_USERNAME] = { .type = NLA_BINARY,
+ .len = FILS_ERP_MAX_USERNAME_LEN },
+ [NL80211_ATTR_FILS_ERP_REALM] = { .type = NLA_BINARY,
+ .len = FILS_ERP_MAX_REALM_LEN },
+ [NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 },
+ [NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY,
+ .len = FILS_ERP_MAX_RRK_LEN },
+ [NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
+ [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
};
/* policy for the key attributes */
@@ -548,7 +557,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
genl_family_attrbuf(&nl80211_fam),
- nl80211_fam.maxattr, nl80211_policy);
+ nl80211_fam.maxattr, nl80211_policy, NULL);
if (err)
return err;
@@ -719,7 +728,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
{
struct nlattr *tb[NL80211_KEY_MAX + 1];
int err = nla_parse_nested(tb, NL80211_KEY_MAX, key,
- nl80211_key_policy);
+ nl80211_key_policy, NULL);
if (err)
return err;
@@ -760,7 +769,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
tb[NL80211_KEY_DEFAULT_TYPES],
- nl80211_key_default_policy);
+ nl80211_key_default_policy, NULL);
if (err)
return err;
@@ -807,10 +816,11 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
- int err = nla_parse_nested(
- kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
- info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
- nl80211_key_default_policy);
+ int err = nla_parse_nested(kdt,
+ NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+ info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
+ nl80211_key_default_policy,
+ info->extack);
if (err)
return err;
@@ -1892,8 +1902,8 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
struct nl80211_dump_wiphy_state *state)
{
struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
- int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- tb, nl80211_fam.maxattr, nl80211_policy);
+ int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb,
+ nl80211_fam.maxattr, nl80211_policy, NULL);
/* ignore parse errors for backward compatibility */
if (ret)
return 0;
@@ -2308,7 +2318,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
rem_txq_params) {
result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX,
nl_txq_params,
- txq_params_policy);
+ txq_params_policy,
+ info->extack);
if (result)
return result;
result = parse_txq_params(tb, &txq_params);
@@ -2695,17 +2706,82 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags)
if (!nla)
return -EINVAL;
- if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX,
- nla, mntr_flags_policy))
+ if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, nla,
+ mntr_flags_policy, NULL))
return -EINVAL;
for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++)
if (flags[flag])
*mntrflags |= (1<<flag);
+ *mntrflags |= MONITOR_FLAG_CHANGED;
+
return 0;
}
+static int nl80211_parse_mon_options(struct cfg80211_registered_device *rdev,
+ enum nl80211_iftype type,
+ struct genl_info *info,
+ struct vif_params *params)
+{
+ bool change = false;
+ int err;
+
+ if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
+ if (type != NL80211_IFTYPE_MONITOR)
+ return -EINVAL;
+
+ err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
+ &params->flags);
+ if (err)
+ return err;
+
+ change = true;
+ }
+
+ if (params->flags & MONITOR_FLAG_ACTIVE &&
+ !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
+ return -EOPNOTSUPP;
+
+ if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) {
+ const u8 *mumimo_groups;
+ u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+ if (type != NL80211_IFTYPE_MONITOR)
+ return -EINVAL;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+ return -EOPNOTSUPP;
+
+ mumimo_groups =
+ nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
+
+ /* bits 0 and 63 are reserved and must be zero */
+ if ((mumimo_groups[0] & BIT(7)) ||
+ (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0)))
+ return -EINVAL;
+
+ params->vht_mumimo_groups = mumimo_groups;
+ change = true;
+ }
+
+ if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) {
+ u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+ if (type != NL80211_IFTYPE_MONITOR)
+ return -EINVAL;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
+ return -EOPNOTSUPP;
+
+ params->vht_mumimo_follow_addr =
+ nla_data(info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]);
+ change = true;
+ }
+
+ return change ? 1 : 0;
+}
+
static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u8 use_4addr,
enum nl80211_iftype iftype)
@@ -2739,7 +2815,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
int err;
enum nl80211_iftype otype, ntype;
struct net_device *dev = info->user_ptr[1];
- u32 _flags, *flags = NULL;
bool change = false;
memset(&params, 0, sizeof(params));
@@ -2782,56 +2857,14 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
params.use_4addr = -1;
}
- if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
- if (ntype != NL80211_IFTYPE_MONITOR)
- return -EINVAL;
- err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
- &_flags);
- if (err)
- return err;
-
- flags = &_flags;
- change = true;
- }
-
- if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) {
- const u8 *mumimo_groups;
- u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
-
- if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
- return -EOPNOTSUPP;
-
- mumimo_groups =
- nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]);
-
- /* bits 0 and 63 are reserved and must be zero */
- if ((mumimo_groups[0] & BIT(7)) ||
- (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0)))
- return -EINVAL;
-
- memcpy(params.vht_mumimo_groups, mumimo_groups,
- VHT_MUMIMO_GROUPS_DATA_LEN);
- change = true;
- }
-
- if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) {
- u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
-
- if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag))
- return -EOPNOTSUPP;
-
- nla_memcpy(params.macaddr,
- info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR],
- ETH_ALEN);
+ err = nl80211_parse_mon_options(rdev, ntype, info, &params);
+ if (err < 0)
+ return err;
+ if (err > 0)
change = true;
- }
-
- if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
- !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
- return -EOPNOTSUPP;
if (change)
- err = cfg80211_change_iface(rdev, dev, ntype, flags, &params);
+ err = cfg80211_change_iface(rdev, dev, ntype, &params);
else
err = 0;
@@ -2849,7 +2882,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int err;
enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
- u32 flags;
/* to avoid failing a new interface creation due to pending removal */
cfg80211_destroy_ifaces(rdev);
@@ -2885,13 +2917,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
return err;
}
- err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
- info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
- &flags);
-
- if (!err && (flags & MONITOR_FLAG_ACTIVE) &&
- !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
- return -EOPNOTSUPP;
+ err = nl80211_parse_mon_options(rdev, type, info, &params);
+ if (err < 0)
+ return err;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -2899,8 +2927,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
wdev = rdev_add_virtual_intf(rdev,
nla_data(info->attrs[NL80211_ATTR_IFNAME]),
- NET_NAME_USER, type, err ? NULL : &flags,
- &params);
+ NET_NAME_USER, type, &params);
if (WARN_ON(!wdev)) {
nlmsg_free(msg);
return -EPROTO;
@@ -3561,7 +3588,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
if (sband == NULL)
return -EINVAL;
err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates,
- nl80211_txattr_policy);
+ nl80211_txattr_policy, info->extack);
if (err)
return err;
if (tb[NL80211_TXRATE_LEGACY]) {
@@ -3818,6 +3845,19 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
return false;
return true;
case NL80211_CMD_CONNECT:
+ /* SAE not supported yet */
+ if (auth_type == NL80211_AUTHTYPE_SAE)
+ return false;
+ /* FILS with SK PFS or PK not supported yet */
+ if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+ auth_type == NL80211_AUTHTYPE_FILS_PK)
+ return false;
+ if (!wiphy_ext_feature_isset(
+ &rdev->wiphy,
+ NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) &&
+ auth_type == NL80211_AUTHTYPE_FILS_SK)
+ return false;
+ return true;
case NL80211_CMD_START_AP:
/* SAE not supported yet */
if (auth_type == NL80211_AUTHTYPE_SAE)
@@ -4100,8 +4140,8 @@ static int parse_station_flags(struct genl_info *info,
if (!nla)
return 0;
- if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX,
- nla, sta_flags_policy))
+ if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, nla,
+ sta_flags_policy, info->extack))
return -EINVAL;
/*
@@ -4151,7 +4191,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
struct nlattr *rate;
u32 bitrate;
u16 bitrate_compat;
- enum nl80211_attrs rate_flg;
+ enum nl80211_rate_info rate_flg;
rate = nla_nest_start(msg, attr);
if (!rate)
@@ -4728,7 +4768,7 @@ static int nl80211_parse_sta_wme(struct genl_info *info,
nla = info->attrs[NL80211_ATTR_STA_WME];
err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
- nl80211_sta_wme_policy);
+ nl80211_sta_wme_policy, info->extack);
if (err)
return err;
@@ -5703,7 +5743,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
cur_params.dot11MeshGateAnnouncementProtocol) ||
nla_put_u8(msg, NL80211_MESHCONF_FORWARDING,
cur_params.dot11MeshForwarding) ||
- nla_put_u32(msg, NL80211_MESHCONF_RSSI_THRESHOLD,
+ nla_put_s32(msg, NL80211_MESHCONF_RSSI_THRESHOLD,
cur_params.rssi_threshold) ||
nla_put_u32(msg, NL80211_MESHCONF_HT_OPMODE,
cur_params.ht_opmode) ||
@@ -5853,7 +5893,7 @@ do { \
return -EINVAL;
if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
info->attrs[NL80211_ATTR_MESH_CONFIG],
- nl80211_meshconf_params_policy))
+ nl80211_meshconf_params_policy, info->extack))
return -EINVAL;
/* This makes sure that there aren't more than 32 mesh config
@@ -6002,7 +6042,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
return -EINVAL;
if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX,
info->attrs[NL80211_ATTR_MESH_SETUP],
- nl80211_mesh_setup_params_policy))
+ nl80211_mesh_setup_params_policy, info->extack))
return -EINVAL;
if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC])
@@ -6393,7 +6433,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
rem_reg_rules) {
r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX,
- nl_reg_rule, reg_rule_policy);
+ nl_reg_rule, reg_rule_policy,
+ info->extack);
if (r)
goto bad_reg;
r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
@@ -6461,7 +6502,7 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy,
return -EINVAL;
err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest,
- nl80211_bss_select_policy);
+ nl80211_bss_select_policy, NULL);
if (err)
return err;
@@ -6545,6 +6586,19 @@ static int nl80211_parse_random_mac(struct nlattr **attrs,
return 0;
}
+static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
+{
+ ASSERT_WDEV_LOCK(wdev);
+
+ if (!cfg80211_beaconing_iface_active(wdev))
+ return true;
+
+ if (!(wdev->chandef.chan->flags & IEEE80211_CHAN_RADAR))
+ return true;
+
+ return regulatory_pre_cac_allowed(wdev->wiphy);
+}
+
static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6670,6 +6724,25 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
request->n_channels = i;
+ wdev_lock(wdev);
+ if (!cfg80211_off_channel_oper_allowed(wdev)) {
+ struct ieee80211_channel *chan;
+
+ if (request->n_channels != 1) {
+ wdev_unlock(wdev);
+ err = -EBUSY;
+ goto out_free;
+ }
+
+ chan = request->channels[0];
+ if (chan->center_freq != wdev->chandef.chan->center_freq) {
+ wdev_unlock(wdev);
+ err = -EBUSY;
+ goto out_free;
+ }
+ }
+ wdev_unlock(wdev);
+
i = 0;
if (n_ssids) {
nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) {
@@ -6862,7 +6935,7 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
return -EINVAL;
err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX,
- attr, nl80211_plan_policy);
+ attr, nl80211_plan_policy, NULL);
if (err)
return err;
@@ -6953,7 +7026,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
err = nla_parse_nested(tb,
NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
- attr, nl80211_match_policy);
+ attr, nl80211_match_policy,
+ NULL);
if (err)
return ERR_PTR(err);
/* add other standalone attributes here */
@@ -7132,7 +7206,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
err = nla_parse_nested(tb,
NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
- attr, nl80211_match_policy);
+ attr, nl80211_match_policy,
+ NULL);
if (err)
goto out_free;
ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
@@ -7290,8 +7365,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req);
- nl80211_send_sched_scan(rdev, dev,
- NL80211_CMD_START_SCHED_SCAN);
+ nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_START_SCHED_SCAN);
return 0;
out_free:
@@ -7433,7 +7507,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX,
info->attrs[NL80211_ATTR_CSA_IES],
- nl80211_policy);
+ nl80211_policy, info->extack);
if (err)
return err;
@@ -8639,7 +8713,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- attrbuf, nl80211_fam.maxattr, nl80211_policy);
+ attrbuf, nl80211_fam.maxattr,
+ nl80211_policy, NULL);
if (err)
goto out_err;
@@ -8867,6 +8942,35 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
}
}
+ if (wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) &&
+ info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] &&
+ info->attrs[NL80211_ATTR_FILS_ERP_REALM] &&
+ info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] &&
+ info->attrs[NL80211_ATTR_FILS_ERP_RRK]) {
+ connect.fils_erp_username =
+ nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]);
+ connect.fils_erp_username_len =
+ nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]);
+ connect.fils_erp_realm =
+ nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]);
+ connect.fils_erp_realm_len =
+ nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]);
+ connect.fils_erp_next_seq_num =
+ nla_get_u16(
+ info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]);
+ connect.fils_erp_rrk =
+ nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]);
+ connect.fils_erp_rrk_len =
+ nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]);
+ } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] ||
+ info->attrs[NL80211_ATTR_FILS_ERP_REALM] ||
+ info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] ||
+ info->attrs[NL80211_ATTR_FILS_ERP_RRK]) {
+ kzfree(connkeys);
+ return -EINVAL;
+ }
+
wdev_lock(dev->ieee80211_ptr);
err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -8986,14 +9090,28 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
- if (!info->attrs[NL80211_ATTR_MAC])
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_PMKID])
return -EINVAL;
pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
- pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+ if (info->attrs[NL80211_ATTR_MAC]) {
+ pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ } else if (info->attrs[NL80211_ATTR_SSID] &&
+ info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
+ (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA ||
+ info->attrs[NL80211_ATTR_PMK])) {
+ pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+ pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+ pmksa.cache_id =
+ nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
+ } else {
+ return -EINVAL;
+ }
+ if (info->attrs[NL80211_ATTR_PMK]) {
+ pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]);
+ pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]);
+ }
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
@@ -9096,6 +9214,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev = info->user_ptr[1];
struct cfg80211_chan_def chandef;
+ const struct cfg80211_chan_def *compat_chandef;
struct sk_buff *msg;
void *hdr;
u64 cookie;
@@ -9124,6 +9243,18 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
if (err)
return err;
+ wdev_lock(wdev);
+ if (!cfg80211_off_channel_oper_allowed(wdev) &&
+ !cfg80211_chandef_identical(&wdev->chandef, &chandef)) {
+ compat_chandef = cfg80211_chandef_compatible(&wdev->chandef,
+ &chandef);
+ if (compat_chandef != &chandef) {
+ wdev_unlock(wdev);
+ return -EBUSY;
+ }
+ }
+ wdev_unlock(wdev);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -9299,6 +9430,13 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!chandef.chan && params.offchan)
return -EINVAL;
+ wdev_lock(wdev);
+ if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev)) {
+ wdev_unlock(wdev);
+ return -EBUSY;
+ }
+ wdev_unlock(wdev);
+
params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
@@ -9466,7 +9604,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
static const struct nla_policy
nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = {
- [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
+ [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_BINARY },
[NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
[NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
[NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 },
@@ -9495,28 +9633,123 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
return rdev_set_cqm_txe_config(rdev, dev, rate, pkts, intvl);
}
+static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
+ struct net_device *dev)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ s32 last, low, high;
+ u32 hyst;
+ int i, n;
+ int err;
+
+ /* RSSI reporting disabled? */
+ if (!wdev->cqm_config)
+ return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
+
+ /*
+ * Obtain current RSSI value if possible, if not and no RSSI threshold
+ * event has been received yet, we should receive an event after a
+ * connection is established and enough beacons received to calculate
+ * the average.
+ */
+ if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
+ rdev->ops->get_station) {
+ struct station_info sinfo;
+ u8 *mac_addr;
+
+ mac_addr = wdev->current_bss->pub.bssid;
+
+ err = rdev_get_station(rdev, dev, mac_addr, &sinfo);
+ if (err)
+ return err;
+
+ if (sinfo.filled & BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
+ wdev->cqm_config->last_rssi_event_value =
+ (s8) sinfo.rx_beacon_signal_avg;
+ }
+
+ last = wdev->cqm_config->last_rssi_event_value;
+ hyst = wdev->cqm_config->rssi_hyst;
+ n = wdev->cqm_config->n_rssi_thresholds;
+
+ for (i = 0; i < n; i++)
+ if (last < wdev->cqm_config->rssi_thresholds[i])
+ break;
+
+ low = i > 0 ?
+ (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN;
+ high = i < n ?
+ (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX;
+
+ return rdev_set_cqm_rssi_range_config(rdev, dev, low, high);
+}
+
static int nl80211_set_cqm_rssi(struct genl_info *info,
- s32 threshold, u32 hysteresis)
+ const s32 *thresholds, int n_thresholds,
+ u32 hysteresis)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int i, err;
+ s32 prev = S32_MIN;
- if (threshold > 0)
- return -EINVAL;
-
- /* disabling - hysteresis should also be zero then */
- if (threshold == 0)
- hysteresis = 0;
+ /* Check all values negative and sorted */
+ for (i = 0; i < n_thresholds; i++) {
+ if (thresholds[i] > 0 || thresholds[i] <= prev)
+ return -EINVAL;
- if (!rdev->ops->set_cqm_rssi_config)
- return -EOPNOTSUPP;
+ prev = thresholds[i];
+ }
if (wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
return -EOPNOTSUPP;
- return rdev_set_cqm_rssi_config(rdev, dev, threshold, hysteresis);
+ wdev_lock(wdev);
+ cfg80211_cqm_config_free(wdev);
+ wdev_unlock(wdev);
+
+ if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
+ if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
+ return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
+
+ return rdev_set_cqm_rssi_config(rdev, dev,
+ thresholds[0], hysteresis);
+ }
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_CQM_RSSI_LIST))
+ return -EOPNOTSUPP;
+
+ if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */
+ n_thresholds = 0;
+
+ wdev_lock(wdev);
+ if (n_thresholds) {
+ struct cfg80211_cqm_config *cqm_config;
+
+ cqm_config = kzalloc(sizeof(struct cfg80211_cqm_config) +
+ n_thresholds * sizeof(s32), GFP_KERNEL);
+ if (!cqm_config) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ cqm_config->rssi_hyst = hysteresis;
+ cqm_config->n_rssi_thresholds = n_thresholds;
+ memcpy(cqm_config->rssi_thresholds, thresholds,
+ n_thresholds * sizeof(s32));
+
+ wdev->cqm_config = cqm_config;
+ }
+
+ err = cfg80211_cqm_rssi_update(rdev, dev);
+
+unlock:
+ wdev_unlock(wdev);
+
+ return err;
}
static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
@@ -9530,16 +9763,22 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
- nl80211_attr_cqm_policy);
+ nl80211_attr_cqm_policy, info->extack);
if (err)
return err;
if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
- s32 threshold = nla_get_s32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+ const s32 *thresholds =
+ nla_data(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+ int len = nla_len(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
- return nl80211_set_cqm_rssi(info, threshold, hysteresis);
+ if (len % 4)
+ return -EINVAL;
+
+ return nl80211_set_cqm_rssi(info, thresholds, len / 4,
+ hysteresis);
}
if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
@@ -9940,7 +10179,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
return -EINVAL;
err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr,
- nl80211_wowlan_tcp_policy);
+ nl80211_wowlan_tcp_policy, NULL);
if (err)
return err;
@@ -10085,7 +10324,8 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev,
goto out;
}
- err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy);
+ err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy,
+ NULL);
if (err)
goto out;
@@ -10122,7 +10362,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG,
info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS],
- nl80211_wowlan_policy);
+ nl80211_wowlan_policy, info->extack);
if (err)
return err;
@@ -10205,7 +10445,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
u8 *mask_pat;
nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- NULL);
+ NULL, info->extack);
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10416,7 +10656,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule,
- nl80211_coalesce_policy);
+ nl80211_coalesce_policy, NULL);
if (err)
return err;
@@ -10454,7 +10694,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL);
+ nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -10575,7 +10815,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA,
info->attrs[NL80211_ATTR_REKEY_DATA],
- nl80211_rekey_policy);
+ nl80211_rekey_policy, info->extack);
if (err)
return err;
@@ -10892,7 +11132,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX,
info->attrs[NL80211_ATTR_NAN_FUNC],
- nl80211_nan_func_policy);
+ nl80211_nan_func_policy, info->extack);
if (err)
return err;
@@ -10989,7 +11229,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
tb[NL80211_NAN_FUNC_SRF],
- nl80211_nan_srf_policy);
+ nl80211_nan_srf_policy, info->extack);
if (err)
goto out;
@@ -11524,8 +11764,8 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
return 0;
}
- err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- attrbuf, nl80211_fam.maxattr, nl80211_policy);
+ err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf,
+ nl80211_fam.maxattr, nl80211_policy, NULL);
if (err)
return err;
@@ -12970,18 +13210,19 @@ static int nl80211_prep_scan_msg(struct sk_buff *msg,
static int
nl80211_prep_sched_scan_msg(struct sk_buff *msg,
- struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- u32 portid, u32 seq, int flags, u32 cmd)
+ struct cfg80211_sched_scan_request *req, u32 cmd)
{
void *hdr;
- hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
+ hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
if (!hdr)
return -1;
- if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
- nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY,
+ wiphy_to_rdev(req->wiphy)->wiphy_idx) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, req->dev->ifindex) ||
+ nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, req->reqid,
+ NL80211_ATTR_PAD))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13041,8 +13282,7 @@ void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev,
NL80211_MCGRP_SCAN, GFP_KERNEL);
}
-void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u32 cmd)
+void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd)
{
struct sk_buff *msg;
@@ -13050,12 +13290,12 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
if (!msg)
return;
- if (nl80211_prep_sched_scan_msg(msg, rdev, netdev, 0, 0, 0, cmd) < 0) {
+ if (nl80211_prep_sched_scan_msg(msg, req, cmd) < 0) {
nlmsg_free(msg);
return;
}
- genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(req->wiphy), msg, 0,
NL80211_MCGRP_SCAN, GFP_KERNEL);
}
@@ -13296,17 +13536,16 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
}
void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status,
- enum nl80211_timeout_reason timeout_reason,
+ struct net_device *netdev,
+ struct cfg80211_connect_resp_params *cr,
gfp_t gfp)
{
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(100 + req_ie_len + resp_ie_len, gfp);
+ msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len +
+ cr->fils_kek_len + cr->pmk_len +
+ (cr->pmkid ? WLAN_PMKID_LEN : 0), gfp);
if (!msg)
return;
@@ -13318,17 +13557,31 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- (bssid && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) ||
+ (cr->bssid &&
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, cr->bssid)) ||
nla_put_u16(msg, NL80211_ATTR_STATUS_CODE,
- status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE :
- status) ||
- (status < 0 &&
+ cr->status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE :
+ cr->status) ||
+ (cr->status < 0 &&
(nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) ||
- nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON, timeout_reason))) ||
- (req_ie &&
- nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) ||
- (resp_ie &&
- nla_put(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie)))
+ nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON,
+ cr->timeout_reason))) ||
+ (cr->req_ie &&
+ nla_put(msg, NL80211_ATTR_REQ_IE, cr->req_ie_len, cr->req_ie)) ||
+ (cr->resp_ie &&
+ nla_put(msg, NL80211_ATTR_RESP_IE, cr->resp_ie_len,
+ cr->resp_ie)) ||
+ (cr->update_erp_next_seq_num &&
+ nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM,
+ cr->fils_erp_next_seq_num)) ||
+ (cr->status == WLAN_STATUS_SUCCESS &&
+ ((cr->fils_kek &&
+ nla_put(msg, NL80211_ATTR_FILS_KEK, cr->fils_kek_len,
+ cr->fils_kek)) ||
+ (cr->pmk &&
+ nla_put(msg, NL80211_ATTR_PMK, cr->pmk_len, cr->pmk)) ||
+ (cr->pmkid &&
+ nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, cr->pmkid)))))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13968,6 +14221,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
s32 rssi_level, gfp_t gfp)
{
struct sk_buff *msg;
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level);
@@ -13975,6 +14230,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH))
return;
+ if (wdev->cqm_config) {
+ wdev->cqm_config->last_rssi_event_value = rssi_level;
+
+ cfg80211_cqm_rssi_update(rdev, dev);
+
+ if (rssi_level == 0)
+ rssi_level = wdev->cqm_config->last_rssi_event_value;
+ }
+
msg = cfg80211_prepare_cqm(dev, NULL, gfp);
if (!msg)
return;
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index e488dca87423..d5f6860e62ab 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -16,8 +16,7 @@ struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, bool aborted);
void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev,
struct sk_buff *msg);
-void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u32 cmd);
+void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd);
void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
struct regulatory_request *request);
@@ -53,11 +52,8 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *addr, gfp_t gfp);
void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status,
- enum nl80211_timeout_reason timeout_reason,
+ struct net_device *netdev,
+ struct cfg80211_connect_resp_params *params,
gfp_t gfp);
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *bssid,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 2f425075ada8..e4a99989dd06 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -36,13 +36,13 @@ static inline void rdev_set_wakeup(struct cfg80211_registered_device *rdev,
static inline struct wireless_dev
*rdev_add_virtual_intf(struct cfg80211_registered_device *rdev, char *name,
unsigned char name_assign_type,
- enum nl80211_iftype type, u32 *flags,
+ enum nl80211_iftype type,
struct vif_params *params)
{
struct wireless_dev *ret;
trace_rdev_add_virtual_intf(&rdev->wiphy, name, type);
ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, name_assign_type,
- type, flags, params);
+ type, params);
trace_rdev_return_wdev(&rdev->wiphy, ret);
return ret;
}
@@ -61,12 +61,11 @@ rdev_del_virtual_intf(struct cfg80211_registered_device *rdev,
static inline int
rdev_change_virtual_intf(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype type,
- u32 *flags, struct vif_params *params)
+ struct vif_params *params)
{
int ret;
trace_rdev_change_virtual_intf(&rdev->wiphy, dev, type);
- ret = rdev->ops->change_virtual_intf(&rdev->wiphy, dev, type, flags,
- params);
+ ret = rdev->ops->change_virtual_intf(&rdev->wiphy, dev, type, params);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -750,6 +749,18 @@ rdev_set_cqm_rssi_config(struct cfg80211_registered_device *rdev,
}
static inline int
+rdev_set_cqm_rssi_range_config(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, s32 low, s32 high)
+{
+ int ret;
+ trace_rdev_set_cqm_rssi_range_config(&rdev->wiphy, dev, low, high);
+ ret = rdev->ops->set_cqm_rssi_range_config(&rdev->wiphy, dev,
+ low, high);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int
rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev,
struct net_device *dev, u32 rate, u32 pkts, u32 intvl)
{
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 753efcd51fa3..a38f315819cd 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2067,6 +2067,88 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
return REG_REQ_IGNORE;
}
+bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2)
+{
+ const struct ieee80211_regdomain *wiphy1_regd = NULL;
+ const struct ieee80211_regdomain *wiphy2_regd = NULL;
+ const struct ieee80211_regdomain *cfg80211_regd = NULL;
+ bool dfs_domain_same;
+
+ rcu_read_lock();
+
+ cfg80211_regd = rcu_dereference(cfg80211_regdomain);
+ wiphy1_regd = rcu_dereference(wiphy1->regd);
+ if (!wiphy1_regd)
+ wiphy1_regd = cfg80211_regd;
+
+ wiphy2_regd = rcu_dereference(wiphy2->regd);
+ if (!wiphy2_regd)
+ wiphy2_regd = cfg80211_regd;
+
+ dfs_domain_same = wiphy1_regd->dfs_region == wiphy2_regd->dfs_region;
+
+ rcu_read_unlock();
+
+ return dfs_domain_same;
+}
+
+static void reg_copy_dfs_chan_state(struct ieee80211_channel *dst_chan,
+ struct ieee80211_channel *src_chan)
+{
+ if (!(dst_chan->flags & IEEE80211_CHAN_RADAR) ||
+ !(src_chan->flags & IEEE80211_CHAN_RADAR))
+ return;
+
+ if (dst_chan->flags & IEEE80211_CHAN_DISABLED ||
+ src_chan->flags & IEEE80211_CHAN_DISABLED)
+ return;
+
+ if (src_chan->center_freq == dst_chan->center_freq &&
+ dst_chan->dfs_state == NL80211_DFS_USABLE) {
+ dst_chan->dfs_state = src_chan->dfs_state;
+ dst_chan->dfs_state_entered = src_chan->dfs_state_entered;
+ }
+}
+
+static void wiphy_share_dfs_chan_state(struct wiphy *dst_wiphy,
+ struct wiphy *src_wiphy)
+{
+ struct ieee80211_supported_band *src_sband, *dst_sband;
+ struct ieee80211_channel *src_chan, *dst_chan;
+ int i, j, band;
+
+ if (!reg_dfs_domain_same(dst_wiphy, src_wiphy))
+ return;
+
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ dst_sband = dst_wiphy->bands[band];
+ src_sband = src_wiphy->bands[band];
+ if (!dst_sband || !src_sband)
+ continue;
+
+ for (i = 0; i < dst_sband->n_channels; i++) {
+ dst_chan = &dst_sband->channels[i];
+ for (j = 0; j < src_sband->n_channels; j++) {
+ src_chan = &src_sband->channels[j];
+ reg_copy_dfs_chan_state(dst_chan, src_chan);
+ }
+ }
+ }
+}
+
+static void wiphy_all_share_dfs_chan_state(struct wiphy *wiphy)
+{
+ struct cfg80211_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ if (wiphy == &rdev->wiphy)
+ continue;
+ wiphy_share_dfs_chan_state(wiphy, &rdev->wiphy);
+ }
+}
+
/* This processes *all* regulatory hints */
static void reg_process_hint(struct regulatory_request *reg_request)
{
@@ -2110,6 +2192,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
if (treatment == REG_REQ_ALREADY_SET && wiphy &&
wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
wiphy_update_regulatory(wiphy, reg_request->initiator);
+ wiphy_all_share_dfs_chan_state(wiphy);
reg_check_channels();
}
@@ -3061,6 +3144,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
lr = get_last_request();
wiphy_update_regulatory(wiphy, lr->initiator);
+ wiphy_all_share_dfs_chan_state(wiphy);
}
void wiphy_regulatory_deregister(struct wiphy *wiphy)
@@ -3120,6 +3204,70 @@ bool regulatory_indoor_allowed(void)
return reg_is_indoor;
}
+bool regulatory_pre_cac_allowed(struct wiphy *wiphy)
+{
+ const struct ieee80211_regdomain *regd = NULL;
+ const struct ieee80211_regdomain *wiphy_regd = NULL;
+ bool pre_cac_allowed = false;
+
+ rcu_read_lock();
+
+ regd = rcu_dereference(cfg80211_regdomain);
+ wiphy_regd = rcu_dereference(wiphy->regd);
+ if (!wiphy_regd) {
+ if (regd->dfs_region == NL80211_DFS_ETSI)
+ pre_cac_allowed = true;
+
+ rcu_read_unlock();
+
+ return pre_cac_allowed;
+ }
+
+ if (regd->dfs_region == wiphy_regd->dfs_region &&
+ wiphy_regd->dfs_region == NL80211_DFS_ETSI)
+ pre_cac_allowed = true;
+
+ rcu_read_unlock();
+
+ return pre_cac_allowed;
+}
+
+void regulatory_propagate_dfs_state(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_dfs_state dfs_state,
+ enum nl80211_radar_event event)
+{
+ struct cfg80211_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ if (WARN_ON(!cfg80211_chandef_valid(chandef)))
+ return;
+
+ if (WARN_ON(!(chandef->chan->flags & IEEE80211_CHAN_RADAR)))
+ return;
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ if (wiphy == &rdev->wiphy)
+ continue;
+
+ if (!reg_dfs_domain_same(wiphy, &rdev->wiphy))
+ continue;
+
+ if (!ieee80211_get_channel(&rdev->wiphy,
+ chandef->chan->center_freq))
+ continue;
+
+ cfg80211_set_dfs_state(&rdev->wiphy, chandef, dfs_state);
+
+ if (event == NL80211_RADAR_DETECTED ||
+ event == NL80211_RADAR_CAC_FINISHED)
+ cfg80211_sched_dfs_chan_update(rdev);
+
+ nl80211_radar_notify(rdev, chandef, event, NULL, GFP_KERNEL);
+ }
+}
+
int __init regulatory_init(void)
{
int err = 0;
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index f6ced316b5a4..ca7fedf2e7a1 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -143,4 +143,40 @@ int cfg80211_get_unii(int freq);
*/
bool regulatory_indoor_allowed(void);
+/*
+ * Grace period to timeout pre-CAC results on the dfs channels. This timeout
+ * value is used for Non-ETSI domain.
+ * TODO: May be make this timeout available through regdb?
+ */
+#define REG_PRE_CAC_EXPIRY_GRACE_MS 2000
+
+/**
+ * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain
+ * @wiphy: wiphy for which pre-CAC capability is checked.
+
+ * Pre-CAC is allowed only in ETSI domain.
+ */
+bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
+
+/**
+ * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys
+ * @wiphy - wiphy on which radar is detected and the event will be propagated
+ * to other available wiphys having the same DFS domain
+ * @chandef - Channel definition of radar detected channel
+ * @dfs_state - DFS channel state to be set
+ * @event - Type of radar event which triggered this DFS state change
+ *
+ * This function should be called with rtnl lock held.
+ */
+void regulatory_propagate_dfs_state(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_dfs_state dfs_state,
+ enum nl80211_radar_event event);
+
+/**
+ * reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured
+ * @wiphy1 - wiphy it's dfs_region to be checked against that of wiphy2
+ * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
+ */
+bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 21be56b3128e..6f4996c0f4df 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -321,8 +321,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
spin_unlock_bh(&rdev->bss_lock);
request->scan_start = jiffies;
}
- nl80211_send_sched_scan(rdev, request->dev,
- NL80211_CMD_SCHED_SCAN_RESULTS);
+ nl80211_send_sched_scan(request, NL80211_CMD_SCHED_SCAN_RESULTS);
}
rtnl_unlock();
@@ -379,7 +378,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
return err;
}
- nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED);
+ nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_SCHED_SCAN_STOPPED);
RCU_INIT_POINTER(rdev->sched_scan_req, NULL);
kfree_rcu(sched_scan_req, rcu_head);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index b347e63d7aaa..6459bb7c21f7 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -253,10 +253,13 @@ void cfg80211_conn_work(struct work_struct *work)
}
treason = NL80211_TIMEOUT_UNSPECIFIED;
if (cfg80211_conn_do_work(wdev, &treason)) {
- __cfg80211_connect_result(
- wdev->netdev, bssid,
- NULL, 0, NULL, 0, -1, false, NULL,
- treason);
+ struct cfg80211_connect_resp_params cr;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.status = -1;
+ cr.bssid = bssid;
+ cr.timeout_reason = treason;
+ __cfg80211_connect_result(wdev->netdev, &cr, false);
}
wdev_unlock(wdev);
}
@@ -359,10 +362,13 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
schedule_work(&rdev->conn_work);
} else if (status_code != WLAN_STATUS_SUCCESS) {
- __cfg80211_connect_result(wdev->netdev, mgmt->bssid,
- NULL, 0, NULL, 0,
- status_code, false, NULL,
- NL80211_TIMEOUT_UNSPECIFIED);
+ struct cfg80211_connect_resp_params cr;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.status = status_code;
+ cr.bssid = mgmt->bssid;
+ cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED;
+ __cfg80211_connect_result(wdev->netdev, &cr, false);
} else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) {
wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT;
schedule_work(&rdev->conn_work);
@@ -669,12 +675,9 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
*/
/* This method must consume bss one way or another */
-void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
- const u8 *req_ie, size_t req_ie_len,
- const u8 *resp_ie, size_t resp_ie_len,
- int status, bool wextev,
- struct cfg80211_bss *bss,
- enum nl80211_timeout_reason timeout_reason)
+void __cfg80211_connect_result(struct net_device *dev,
+ struct cfg80211_connect_resp_params *cr,
+ bool wextev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
const u8 *country_ie;
@@ -686,48 +689,48 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) {
- cfg80211_put_bss(wdev->wiphy, bss);
+ cfg80211_put_bss(wdev->wiphy, cr->bss);
return;
}
- nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev,
- bssid, req_ie, req_ie_len,
- resp_ie, resp_ie_len,
- status, timeout_reason, GFP_KERNEL);
+ nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr,
+ GFP_KERNEL);
#ifdef CONFIG_CFG80211_WEXT
if (wextev) {
- if (req_ie && status == WLAN_STATUS_SUCCESS) {
+ if (cr->req_ie && cr->status == WLAN_STATUS_SUCCESS) {
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = req_ie_len;
- wireless_send_event(dev, IWEVASSOCREQIE, &wrqu, req_ie);
+ wrqu.data.length = cr->req_ie_len;
+ wireless_send_event(dev, IWEVASSOCREQIE, &wrqu,
+ cr->req_ie);
}
- if (resp_ie && status == WLAN_STATUS_SUCCESS) {
+ if (cr->resp_ie && cr->status == WLAN_STATUS_SUCCESS) {
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = resp_ie_len;
- wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie);
+ wrqu.data.length = cr->resp_ie_len;
+ wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu,
+ cr->resp_ie);
}
memset(&wrqu, 0, sizeof(wrqu));
wrqu.ap_addr.sa_family = ARPHRD_ETHER;
- if (bssid && status == WLAN_STATUS_SUCCESS) {
- memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
- memcpy(wdev->wext.prev_bssid, bssid, ETH_ALEN);
+ if (cr->bssid && cr->status == WLAN_STATUS_SUCCESS) {
+ memcpy(wrqu.ap_addr.sa_data, cr->bssid, ETH_ALEN);
+ memcpy(wdev->wext.prev_bssid, cr->bssid, ETH_ALEN);
wdev->wext.prev_bssid_valid = true;
}
wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
}
#endif
- if (!bss && (status == WLAN_STATUS_SUCCESS)) {
+ if (!cr->bss && (cr->status == WLAN_STATUS_SUCCESS)) {
WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
- bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
- wdev->ssid, wdev->ssid_len,
- wdev->conn_bss_type,
- IEEE80211_PRIVACY_ANY);
- if (bss)
- cfg80211_hold_bss(bss_from_pub(bss));
+ cr->bss = cfg80211_get_bss(wdev->wiphy, NULL, cr->bssid,
+ wdev->ssid, wdev->ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY_ANY);
+ if (cr->bss)
+ cfg80211_hold_bss(bss_from_pub(cr->bss));
}
if (wdev->current_bss) {
@@ -736,29 +739,29 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
wdev->current_bss = NULL;
}
- if (status != WLAN_STATUS_SUCCESS) {
+ if (cr->status != WLAN_STATUS_SUCCESS) {
kzfree(wdev->connect_keys);
wdev->connect_keys = NULL;
wdev->ssid_len = 0;
wdev->conn_owner_nlportid = 0;
- if (bss) {
- cfg80211_unhold_bss(bss_from_pub(bss));
- cfg80211_put_bss(wdev->wiphy, bss);
+ if (cr->bss) {
+ cfg80211_unhold_bss(bss_from_pub(cr->bss));
+ cfg80211_put_bss(wdev->wiphy, cr->bss);
}
cfg80211_sme_free(wdev);
return;
}
- if (WARN_ON(!bss))
+ if (WARN_ON(!cr->bss))
return;
- wdev->current_bss = bss_from_pub(bss);
+ wdev->current_bss = bss_from_pub(cr->bss);
if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
cfg80211_upload_connect_keys(wdev);
rcu_read_lock();
- country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY);
+ country_ie = ieee80211_bss_get_ie(cr->bss, WLAN_EID_COUNTRY);
if (!country_ie) {
rcu_read_unlock();
return;
@@ -775,64 +778,95 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
* - country_ie + 2, the start of the country ie data, and
* - and country_ie[1] which is the IE length
*/
- regulatory_hint_country_ie(wdev->wiphy, bss->channel->band,
+ regulatory_hint_country_ie(wdev->wiphy, cr->bss->channel->band,
country_ie + 2, country_ie[1]);
kfree(country_ie);
}
/* Consumes bss object one way or another */
-void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
- struct cfg80211_bss *bss, const u8 *req_ie,
- size_t req_ie_len, const u8 *resp_ie,
- size_t resp_ie_len, int status, gfp_t gfp,
- enum nl80211_timeout_reason timeout_reason)
+void cfg80211_connect_done(struct net_device *dev,
+ struct cfg80211_connect_resp_params *params,
+ gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;
+ u8 *next;
- if (bss) {
+ if (params->bss) {
/* Make sure the bss entry provided by the driver is valid. */
- struct cfg80211_internal_bss *ibss = bss_from_pub(bss);
+ struct cfg80211_internal_bss *ibss = bss_from_pub(params->bss);
if (WARN_ON(list_empty(&ibss->list))) {
- cfg80211_put_bss(wdev->wiphy, bss);
+ cfg80211_put_bss(wdev->wiphy, params->bss);
return;
}
}
- ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
+ ev = kzalloc(sizeof(*ev) + (params->bssid ? ETH_ALEN : 0) +
+ params->req_ie_len + params->resp_ie_len +
+ params->fils_kek_len + params->pmk_len +
+ (params->pmkid ? WLAN_PMKID_LEN : 0), gfp);
if (!ev) {
- cfg80211_put_bss(wdev->wiphy, bss);
+ cfg80211_put_bss(wdev->wiphy, params->bss);
return;
}
ev->type = EVENT_CONNECT_RESULT;
- if (bssid)
- memcpy(ev->cr.bssid, bssid, ETH_ALEN);
- if (req_ie_len) {
- ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
- ev->cr.req_ie_len = req_ie_len;
- memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
+ next = ((u8 *)ev) + sizeof(*ev);
+ if (params->bssid) {
+ ev->cr.bssid = next;
+ memcpy((void *)ev->cr.bssid, params->bssid, ETH_ALEN);
+ next += ETH_ALEN;
}
- if (resp_ie_len) {
- ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
- ev->cr.resp_ie_len = resp_ie_len;
- memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
+ if (params->req_ie_len) {
+ ev->cr.req_ie = next;
+ ev->cr.req_ie_len = params->req_ie_len;
+ memcpy((void *)ev->cr.req_ie, params->req_ie,
+ params->req_ie_len);
+ next += params->req_ie_len;
}
- if (bss)
- cfg80211_hold_bss(bss_from_pub(bss));
- ev->cr.bss = bss;
- ev->cr.status = status;
- ev->cr.timeout_reason = timeout_reason;
+ if (params->resp_ie_len) {
+ ev->cr.resp_ie = next;
+ ev->cr.resp_ie_len = params->resp_ie_len;
+ memcpy((void *)ev->cr.resp_ie, params->resp_ie,
+ params->resp_ie_len);
+ next += params->resp_ie_len;
+ }
+ if (params->fils_kek_len) {
+ ev->cr.fils_kek = next;
+ ev->cr.fils_kek_len = params->fils_kek_len;
+ memcpy((void *)ev->cr.fils_kek, params->fils_kek,
+ params->fils_kek_len);
+ next += params->fils_kek_len;
+ }
+ if (params->pmk_len) {
+ ev->cr.pmk = next;
+ ev->cr.pmk_len = params->pmk_len;
+ memcpy((void *)ev->cr.pmk, params->pmk, params->pmk_len);
+ next += params->pmk_len;
+ }
+ if (params->pmkid) {
+ ev->cr.pmkid = next;
+ memcpy((void *)ev->cr.pmkid, params->pmkid, WLAN_PMKID_LEN);
+ next += WLAN_PMKID_LEN;
+ }
+ ev->cr.update_erp_next_seq_num = params->update_erp_next_seq_num;
+ if (params->update_erp_next_seq_num)
+ ev->cr.fils_erp_next_seq_num = params->fils_erp_next_seq_num;
+ if (params->bss)
+ cfg80211_hold_bss(bss_from_pub(params->bss));
+ ev->cr.bss = params->bss;
+ ev->cr.status = params->status;
+ ev->cr.timeout_reason = params->timeout_reason;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
spin_unlock_irqrestore(&wdev->event_lock, flags);
queue_work(cfg80211_wq, &rdev->event_work);
}
-EXPORT_SYMBOL(cfg80211_connect_bss);
+EXPORT_SYMBOL(cfg80211_connect_done);
/* Consumes bss object one way or another */
void __cfg80211_roamed(struct wireless_dev *wdev,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 776e80cef9b4..fd55786f0462 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1322,6 +1322,28 @@ TRACE_EVENT(rdev_set_cqm_rssi_config,
__entry->rssi_thold, __entry->rssi_hyst)
);
+TRACE_EVENT(rdev_set_cqm_rssi_range_config,
+ TP_PROTO(struct wiphy *wiphy,
+ struct net_device *netdev, s32 low, s32 high),
+ TP_ARGS(wiphy, netdev, low, high),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(s32, rssi_low)
+ __field(s32, rssi_high)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->rssi_low = low;
+ __entry->rssi_high = high;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
+ ", range: %d - %d ",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->rssi_low, __entry->rssi_high)
+);
+
TRACE_EVENT(rdev_set_cqm_txe_config,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 rate,
u32 pkts, u32 intvl),
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 68e5f2ecee1a..a46bc42d0910 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -659,7 +659,7 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
int offset, int len)
{
struct skb_shared_info *sh = skb_shinfo(skb);
- const skb_frag_t *frag = &sh->frags[-1];
+ const skb_frag_t *frag = &sh->frags[0];
struct page *frag_page;
void *frag_ptr;
int frag_len, frag_size;
@@ -672,10 +672,10 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
while (offset >= frag_size) {
offset -= frag_size;
- frag++;
frag_page = skb_frag_page(frag);
frag_ptr = skb_frag_address(frag);
frag_size = skb_frag_size(frag);
+ frag++;
}
frag_ptr += offset;
@@ -687,12 +687,12 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
len -= cur_len;
while (len > 0) {
- frag++;
frag_len = skb_frag_size(frag);
cur_len = min(len, frag_len);
__frame_add_frag(frame, skb_frag_page(frag),
skb_frag_address(frag), cur_len, frag_len);
len -= cur_len;
+ frag++;
}
}
@@ -914,11 +914,11 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
netdev_err(dev, "failed to set key %d\n", i);
continue;
}
- if (wdev->connect_keys->def == i)
- if (rdev_set_default_key(rdev, dev, i, true, true)) {
- netdev_err(dev, "failed to set defkey %d\n", i);
- continue;
- }
+ if (wdev->connect_keys->def == i &&
+ rdev_set_default_key(rdev, dev, i, true, true)) {
+ netdev_err(dev, "failed to set defkey %d\n", i);
+ continue;
+ }
}
kzfree(wdev->connect_keys);
@@ -929,7 +929,6 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
{
struct cfg80211_event *ev;
unsigned long flags;
- const u8 *bssid = NULL;
spin_lock_irqsave(&wdev->event_lock, flags);
while (!list_empty(&wdev->event_list)) {
@@ -941,15 +940,10 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
wdev_lock(wdev);
switch (ev->type) {
case EVENT_CONNECT_RESULT:
- if (!is_zero_ether_addr(ev->cr.bssid))
- bssid = ev->cr.bssid;
__cfg80211_connect_result(
- wdev->netdev, bssid,
- ev->cr.req_ie, ev->cr.req_ie_len,
- ev->cr.resp_ie, ev->cr.resp_ie_len,
- ev->cr.status,
- ev->cr.status == WLAN_STATUS_SUCCESS,
- ev->cr.bss, ev->cr.timeout_reason);
+ wdev->netdev,
+ &ev->cr,
+ ev->cr.status == WLAN_STATUS_SUCCESS);
break;
case EVENT_ROAMED:
__cfg80211_roamed(wdev, ev->rm.bss, ev->rm.req_ie,
@@ -991,7 +985,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev)
int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
- u32 *flags, struct vif_params *params)
+ struct vif_params *params)
{
int err;
enum nl80211_iftype otype = dev->ieee80211_ptr->iftype;
@@ -1049,7 +1043,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
cfg80211_process_rdev_events(rdev);
}
- err = rdev_change_virtual_intf(rdev, dev, ntype, flags, params);
+ err = rdev_change_virtual_intf(rdev, dev, ntype, params);
WARN_ON(!err && dev->ieee80211_ptr->iftype != ntype);
@@ -1097,6 +1091,35 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
return err;
}
+static u32 cfg80211_calculate_bitrate_ht(struct rate_info *rate)
+{
+ int modulation, streams, bitrate;
+
+ /* the formula below does only work for MCS values smaller than 32 */
+ if (WARN_ON_ONCE(rate->mcs >= 32))
+ return 0;
+
+ modulation = rate->mcs & 7;
+ streams = (rate->mcs >> 3) + 1;
+
+ bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000;
+
+ if (modulation < 4)
+ bitrate *= (modulation + 1);
+ else if (modulation == 4)
+ bitrate *= (modulation + 2);
+ else
+ bitrate *= (modulation + 3);
+
+ bitrate *= streams;
+
+ if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ bitrate = (bitrate / 9) * 10;
+
+ /* do NOT round down here */
+ return (bitrate + 50000) / 100000;
+}
+
static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate)
{
static const u32 __mcs2bitrate[] = {
@@ -1230,39 +1253,14 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
u32 cfg80211_calculate_bitrate(struct rate_info *rate)
{
- int modulation, streams, bitrate;
-
- if (!(rate->flags & RATE_INFO_FLAGS_MCS) &&
- !(rate->flags & RATE_INFO_FLAGS_VHT_MCS))
- return rate->legacy;
+ if (rate->flags & RATE_INFO_FLAGS_MCS)
+ return cfg80211_calculate_bitrate_ht(rate);
if (rate->flags & RATE_INFO_FLAGS_60G)
return cfg80211_calculate_bitrate_60g(rate);
if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
return cfg80211_calculate_bitrate_vht(rate);
- /* the formula below does only work for MCS values smaller than 32 */
- if (WARN_ON_ONCE(rate->mcs >= 32))
- return 0;
-
- modulation = rate->mcs & 7;
- streams = (rate->mcs >> 3) + 1;
-
- bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000;
-
- if (modulation < 4)
- bitrate *= (modulation + 1);
- else if (modulation == 4)
- bitrate *= (modulation + 2);
- else
- bitrate *= (modulation + 3);
-
- bitrate *= streams;
-
- if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
- bitrate = (bitrate / 9) * 10;
-
- /* do NOT round down here */
- return (bitrate + 50000) / 100000;
+ return rate->legacy;
}
EXPORT_SYMBOL(cfg80211_calculate_bitrate);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a220156cf217..5d4a02c7979b 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -62,7 +62,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
memset(&vifparams, 0, sizeof(vifparams));
- return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams);
+ return cfg80211_change_iface(rdev, dev, type, &vifparams);
}
EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode);
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index c0e961983f17..abf81b329dc1 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
xfrm_sysctl.o xfrm_replay.o
+obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
new file mode 100644
index 000000000000..8ec8a3fcf8d4
--- /dev/null
+++ b/net/xfrm/xfrm_device.c
@@ -0,0 +1,208 @@
+/*
+ * xfrm_device.c - IPsec device offloading code.
+ *
+ * Copyright (c) 2015 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+#include <linux/notifier.h>
+
+int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+{
+ int err;
+ struct xfrm_state *x;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (skb_is_gso(skb))
+ return 0;
+
+ if (xo) {
+ x = skb->sp->xvec[skb->sp->len - 1];
+ if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ return 0;
+
+ x->outer_mode->xmit(x, skb);
+
+ err = x->type_offload->xmit(x, skb, features);
+ if (err) {
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ return err;
+ }
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(validate_xmit_xfrm);
+
+int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
+ struct xfrm_user_offload *xuo)
+{
+ int err;
+ struct dst_entry *dst;
+ struct net_device *dev;
+ struct xfrm_state_offload *xso = &x->xso;
+ xfrm_address_t *saddr;
+ xfrm_address_t *daddr;
+
+ if (!x->type_offload)
+ return 0;
+
+ /* We don't yet support UDP encapsulation, TFC padding and ESN. */
+ if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
+ return 0;
+
+ dev = dev_get_by_index(net, xuo->ifindex);
+ if (!dev) {
+ if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
+ saddr = &x->props.saddr;
+ daddr = &x->id.daddr;
+ } else {
+ saddr = &x->id.daddr;
+ daddr = &x->props.saddr;
+ }
+
+ dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family);
+ if (IS_ERR(dst))
+ return 0;
+
+ dev = dst->dev;
+
+ dev_hold(dev);
+ dst_release(dst);
+ }
+
+ if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
+ dev_put(dev);
+ return 0;
+ }
+
+ xso->dev = dev;
+ xso->num_exthdrs = 1;
+ xso->flags = xuo->flags;
+
+ err = dev->xfrmdev_ops->xdo_dev_state_add(x);
+ if (err) {
+ dev_put(dev);
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_state_add);
+
+bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+ int mtu;
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct net_device *dev = x->xso.dev;
+
+ if (!x->type_offload || x->encap)
+ return false;
+
+ if ((x->xso.offload_handle && (dev == dst->path->dev)) &&
+ !dst->child->xfrm && x->type->get_mtu) {
+ mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
+
+ if (skb->len <= mtu)
+ goto ok;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ goto ok;
+ }
+
+ return false;
+
+ok:
+ if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok)
+ return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+
+int xfrm_dev_register(struct net_device *dev)
+{
+ if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
+ return NOTIFY_BAD;
+ if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
+ !(dev->features & NETIF_F_HW_ESP))
+ return NOTIFY_BAD;
+
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_unregister(struct net_device *dev)
+{
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_feat_change(struct net_device *dev)
+{
+ if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
+ return NOTIFY_BAD;
+ else if (!(dev->features & NETIF_F_HW_ESP))
+ dev->xfrmdev_ops = NULL;
+
+ if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
+ !(dev->features & NETIF_F_HW_ESP))
+ return NOTIFY_BAD;
+
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_down(struct net_device *dev)
+{
+ if (dev->hw_features & NETIF_F_HW_ESP)
+ xfrm_dev_state_flush(dev_net(dev), dev, true);
+
+ xfrm_garbage_collect(dev_net(dev));
+
+ return NOTIFY_DONE;
+}
+
+static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ return xfrm_dev_register(dev);
+
+ case NETDEV_UNREGISTER:
+ return xfrm_dev_unregister(dev);
+
+ case NETDEV_FEAT_CHANGE:
+ return xfrm_dev_feat_change(dev);
+
+ case NETDEV_DOWN:
+ return xfrm_dev_down(dev);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xfrm_dev_notifier = {
+ .notifier_call = xfrm_dev_event,
+};
+
+void __net_init xfrm_dev_init(void)
+{
+ register_netdevice_notifier(&xfrm_dev_notifier);
+}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 666c5ffe929d..eaea9c4fb3b0 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -54,8 +54,8 @@ static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr,
static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr,
__u8 prefixlen)
{
- int pdw;
- int pbi;
+ unsigned int pdw;
+ unsigned int pbi;
u32 initval = 0;
pdw = prefixlen >> 5; /* num of whole u32 in prefix */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 46bdb4fbed0b..21c6cc965402 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -107,6 +107,8 @@ struct sec_path *secpath_dup(struct sec_path *src)
sp->len = 0;
sp->olen = 0;
+ memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH]));
+
if (src) {
int i;
@@ -207,8 +209,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
unsigned int family;
int decaps = 0;
int async = 0;
- struct xfrm_offload *xo;
bool xfrm_gro = false;
+ bool crypto_done = false;
+ struct xfrm_offload *xo = xfrm_offload(skb);
if (encap_type < 0) {
x = xfrm_input_state(skb);
@@ -220,9 +223,40 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
seq = XFRM_SKB_CB(skb)->seq.input.low;
goto resume;
}
+
/* encap_type < -1 indicates a GRO call. */
encap_type = 0;
seq = XFRM_SPI_SKB_CB(skb)->seq;
+
+ if (xo && (xo->flags & CRYPTO_DONE)) {
+ crypto_done = true;
+ x = xfrm_input_state(skb);
+ family = XFRM_SPI_SKB_CB(skb)->family;
+
+ if (!(xo->status & CRYPTO_SUCCESS)) {
+ if (xo->status &
+ (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
+ CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
+ CRYPTO_TUNNEL_AH_AUTH_FAILED |
+ CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {
+
+ xfrm_audit_state_icvfail(x, skb,
+ x->type->proto);
+ x->stats.integrity_failed++;
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+ goto drop;
+ }
+
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto drop;
+ }
+
+ if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+ goto drop;
+ }
+ }
+
goto lock;
}
@@ -311,7 +345,10 @@ lock:
skb_dst_force(skb);
dev_hold(skb->dev);
- nexthdr = x->type->input(x, skb);
+ if (crypto_done)
+ nexthdr = x->type_offload->input_tail(x, skb);
+ else
+ nexthdr = x->type->input(x, skb);
if (nexthdr == -EINPROGRESS)
return 0;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8ba29fe58352..8c0b6722aaa8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -99,12 +99,13 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
skb_dst_force(skb);
- /* Inner headers are invalid now. */
- skb->encapsulation = 0;
-
- err = x->type->output(x, skb);
- if (err == -EINPROGRESS)
- goto out;
+ if (xfrm_offload(skb)) {
+ x->type_offload->encap(x, skb);
+ } else {
+ err = x->type->output(x, skb);
+ if (err == -EINPROGRESS)
+ goto out;
+ }
resume:
if (err) {
@@ -200,8 +201,40 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
int err;
+ secpath_reset(skb);
+ skb->encapsulation = 0;
+
+ if (xfrm_dev_offload_ok(skb, x)) {
+ struct sec_path *sp;
+
+ sp = secpath_dup(skb->sp);
+ if (!sp) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sp)
+ secpath_put(skb->sp);
+ skb->sp = sp;
+ skb->encapsulation = 1;
+
+ sp->olen++;
+ sp->xvec[skb->sp->len++] = x;
+ xfrm_state_hold(x);
+
+ if (skb_is_gso(skb)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_ESP;
+
+ return xfrm_output2(net, sk, skb);
+ }
+
+ if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)
+ goto out;
+ }
+
if (skb_is_gso(skb))
return xfrm_output_gso(net, sk, skb);
@@ -214,6 +247,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
}
}
+out:
return xfrm_output2(net, sk, skb);
}
EXPORT_SYMBOL_GPL(xfrm_output);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 236cbbc0ab9c..dd44ddc1aea5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -116,11 +116,10 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
return afinfo;
}
-static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
- int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- int family)
+struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr,
+ int family)
{
const struct xfrm_policy_afinfo *afinfo;
struct dst_entry *dst;
@@ -135,6 +134,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
return dst;
}
+EXPORT_SYMBOL(__xfrm_dst_lookup);
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
int tos, int oif,
@@ -2929,21 +2929,6 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
-static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- switch (event) {
- case NETDEV_DOWN:
- xfrm_garbage_collect(dev_net(dev));
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block xfrm_dev_notifier = {
- .notifier_call = xfrm_dev_event,
-};
-
#ifdef CONFIG_XFRM_STATISTICS
static int __net_init xfrm_statistics_init(struct net *net)
{
@@ -3020,7 +3005,7 @@ static int __net_init xfrm_policy_init(struct net *net)
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
if (net_eq(net, &init_net))
- register_netdevice_notifier(&xfrm_dev_notifier);
+ xfrm_dev_init();
return 0;
out_bydst:
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index cdc2e2e71bff..8b23c5bcf8e8 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -45,7 +45,8 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq)
return seq_hi;
}
-
+EXPORT_SYMBOL(xfrm_replay_seqhi);
+;
static void xfrm_replay_notify(struct xfrm_state *x, int event)
{
struct km_event c;
@@ -558,6 +559,158 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
x->repl->notify(x, XFRM_REPLAY_UPDATE);
}
+#ifdef CONFIG_XFRM_OFFLOAD
+static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = 0;
+ struct net *net = xs_net(x);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ __u32 oseq = x->replay.oseq;
+
+ if (!xo)
+ return xfrm_replay_overflow(x, skb);
+
+ if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
+ if (!skb_is_gso(skb)) {
+ XFRM_SKB_CB(skb)->seq.output.low = ++oseq;
+ xo->seq.low = oseq;
+ } else {
+ XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
+ xo->seq.low = oseq + 1;
+ oseq += skb_shinfo(skb)->gso_segs;
+ }
+
+ XFRM_SKB_CB(skb)->seq.output.hi = 0;
+ xo->seq.hi = 0;
+ if (unlikely(oseq < x->replay.oseq)) {
+ xfrm_audit_state_replay_overflow(x, skb);
+ err = -EOVERFLOW;
+
+ return err;
+ }
+
+ x->replay.oseq = oseq;
+
+ if (xfrm_aevent_is_on(net))
+ x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ }
+
+ return err;
+}
+
+static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = 0;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ struct net *net = xs_net(x);
+ __u32 oseq = replay_esn->oseq;
+
+ if (!xo)
+ return xfrm_replay_overflow_bmp(x, skb);
+
+ if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
+ if (!skb_is_gso(skb)) {
+ XFRM_SKB_CB(skb)->seq.output.low = ++oseq;
+ xo->seq.low = oseq;
+ } else {
+ XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
+ xo->seq.low = oseq + 1;
+ oseq += skb_shinfo(skb)->gso_segs;
+ }
+
+ XFRM_SKB_CB(skb)->seq.output.hi = 0;
+ xo->seq.hi = 0;
+ if (unlikely(oseq < replay_esn->oseq)) {
+ xfrm_audit_state_replay_overflow(x, skb);
+ err = -EOVERFLOW;
+
+ return err;
+ } else {
+ replay_esn->oseq = oseq;
+ }
+
+ if (xfrm_aevent_is_on(net))
+ x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ }
+
+ return err;
+}
+
+static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = 0;
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ struct net *net = xs_net(x);
+ __u32 oseq = replay_esn->oseq;
+ __u32 oseq_hi = replay_esn->oseq_hi;
+
+ if (!xo)
+ return xfrm_replay_overflow_esn(x, skb);
+
+ if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
+ if (!skb_is_gso(skb)) {
+ XFRM_SKB_CB(skb)->seq.output.low = ++oseq;
+ XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
+ xo->seq.low = oseq;
+ xo->seq.hi = oseq_hi;
+ } else {
+ XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
+ XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
+ xo->seq.low = oseq = oseq + 1;
+ xo->seq.hi = oseq_hi;
+ oseq += skb_shinfo(skb)->gso_segs;
+ }
+
+ if (unlikely(oseq < replay_esn->oseq)) {
+ XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
+ xo->seq.hi = oseq_hi;
+
+ if (replay_esn->oseq_hi == 0) {
+ replay_esn->oseq--;
+ replay_esn->oseq_hi--;
+ xfrm_audit_state_replay_overflow(x, skb);
+ err = -EOVERFLOW;
+
+ return err;
+ }
+ }
+
+ replay_esn->oseq = oseq;
+ replay_esn->oseq_hi = oseq_hi;
+
+ if (xfrm_aevent_is_on(net))
+ x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ }
+
+ return err;
+}
+
+static const struct xfrm_replay xfrm_replay_legacy = {
+ .advance = xfrm_replay_advance,
+ .check = xfrm_replay_check,
+ .recheck = xfrm_replay_check,
+ .notify = xfrm_replay_notify,
+ .overflow = xfrm_replay_overflow_offload,
+};
+
+static const struct xfrm_replay xfrm_replay_bmp = {
+ .advance = xfrm_replay_advance_bmp,
+ .check = xfrm_replay_check_bmp,
+ .recheck = xfrm_replay_check_bmp,
+ .notify = xfrm_replay_notify_bmp,
+ .overflow = xfrm_replay_overflow_offload_bmp,
+};
+
+static const struct xfrm_replay xfrm_replay_esn = {
+ .advance = xfrm_replay_advance_esn,
+ .check = xfrm_replay_check_esn,
+ .recheck = xfrm_replay_recheck_esn,
+ .notify = xfrm_replay_notify_esn,
+ .overflow = xfrm_replay_overflow_offload_esn,
+};
+#else
static const struct xfrm_replay xfrm_replay_legacy = {
.advance = xfrm_replay_advance,
.check = xfrm_replay_check,
@@ -581,6 +734,7 @@ static const struct xfrm_replay xfrm_replay_esn = {
.notify = xfrm_replay_notify_esn,
.overflow = xfrm_replay_overflow_esn,
};
+#endif
int xfrm_init_replay(struct xfrm_state *x)
{
@@ -595,10 +749,12 @@ int xfrm_init_replay(struct xfrm_state *x)
if (replay_esn->replay_window == 0)
return -EINVAL;
x->repl = &xfrm_replay_esn;
- } else
+ } else {
x->repl = &xfrm_replay_bmp;
- } else
+ }
+ } else {
x->repl = &xfrm_replay_legacy;
+ }
return 0;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5a597dbbe564..fc3c5aa38754 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -251,6 +251,75 @@ static void xfrm_put_type(const struct xfrm_type *type)
module_put(type->owner);
}
+static DEFINE_SPINLOCK(xfrm_type_offload_lock);
+int xfrm_register_type_offload(const struct xfrm_type_offload *type,
+ unsigned short family)
+{
+ struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+ const struct xfrm_type_offload **typemap;
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+ typemap = afinfo->type_offload_map;
+ spin_lock_bh(&xfrm_type_offload_lock);
+
+ if (likely(typemap[type->proto] == NULL))
+ typemap[type->proto] = type;
+ else
+ err = -EEXIST;
+ spin_unlock_bh(&xfrm_type_offload_lock);
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(xfrm_register_type_offload);
+
+int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
+ unsigned short family)
+{
+ struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+ const struct xfrm_type_offload **typemap;
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EAFNOSUPPORT;
+ typemap = afinfo->type_offload_map;
+ spin_lock_bh(&xfrm_type_offload_lock);
+
+ if (unlikely(typemap[type->proto] != type))
+ err = -ENOENT;
+ else
+ typemap[type->proto] = NULL;
+ spin_unlock_bh(&xfrm_type_offload_lock);
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(xfrm_unregister_type_offload);
+
+static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
+{
+ struct xfrm_state_afinfo *afinfo;
+ const struct xfrm_type_offload **typemap;
+ const struct xfrm_type_offload *type;
+
+ afinfo = xfrm_state_get_afinfo(family);
+ if (unlikely(afinfo == NULL))
+ return NULL;
+ typemap = afinfo->type_offload_map;
+
+ type = typemap[proto];
+ if ((type && !try_module_get(type->owner)))
+ type = NULL;
+
+ rcu_read_unlock();
+ return type;
+}
+
+static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
+{
+ module_put(type->owner);
+}
+
static DEFINE_SPINLOCK(xfrm_mode_lock);
int xfrm_register_mode(struct xfrm_mode *mode, int family)
{
@@ -365,10 +434,13 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
xfrm_put_mode(x->inner_mode_iaf);
if (x->outer_mode)
xfrm_put_mode(x->outer_mode);
+ if (x->type_offload)
+ xfrm_put_type_offload(x->type_offload);
if (x->type) {
x->type->destructor(x);
xfrm_put_type(x->type);
}
+ xfrm_dev_state_free(x);
security_xfrm_state_free(x);
kfree(x);
}
@@ -538,6 +610,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock);
+ xfrm_dev_state_delete(x);
+
/* All xfrm_state objects are created by xfrm_state_alloc.
* The xfrm_state_alloc call gives a reference, and that
* is what we are dropping here.
@@ -582,12 +656,41 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
return err;
}
+
+static inline int
+xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
+{
+ int i, err = 0;
+
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ struct xfrm_state *x;
+ struct xfrm_state_offload *xso;
+
+ hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ xso = &x->xso;
+
+ if (xso->dev == dev &&
+ (err = security_xfrm_state_delete(x)) != 0) {
+ xfrm_audit_state_delete(x, 0, task_valid);
+ return err;
+ }
+ }
+ }
+
+ return err;
+}
#else
static inline int
xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
{
return 0;
}
+
+static inline int
+xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
+{
+ return 0;
+}
#endif
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
@@ -630,6 +733,48 @@ out:
}
EXPORT_SYMBOL(xfrm_state_flush);
+int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
+{
+ int i, err = 0, cnt = 0;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
+ if (err)
+ goto out;
+
+ err = -ESRCH;
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ struct xfrm_state *x;
+ struct xfrm_state_offload *xso;
+restart:
+ hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
+ xso = &x->xso;
+
+ if (!xfrm_state_kern(x) && xso->dev == dev) {
+ xfrm_state_hold(x);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ err = xfrm_state_delete(x);
+ xfrm_audit_state_delete(x, err ? 0 : 1,
+ task_valid);
+ xfrm_state_put(x);
+ if (!err)
+ cnt++;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ goto restart;
+ }
+ }
+ }
+ if (cnt)
+ err = 0;
+
+out:
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_dev_state_flush);
+
void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
{
spin_lock_bh(&net->xfrm.xfrm_state_lock);
@@ -2077,6 +2222,8 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
if (x->type == NULL)
goto error;
+ x->type_offload = xfrm_get_type_offload(x->id.proto, family);
+
err = x->type->init_state(x);
if (err)
goto error;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 40a8aa39220d..ba74e5eeeeef 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -595,6 +595,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
+ if (attrs[XFRMA_OFFLOAD_DEV] &&
+ xfrm_dev_state_add(net, x, nla_data(attrs[XFRMA_OFFLOAD_DEV])))
+ goto error;
+
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
goto error;
@@ -779,6 +783,23 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
return 0;
}
+static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb)
+{
+ struct xfrm_user_offload *xuo;
+ struct nlattr *attr;
+
+ attr = nla_reserve(skb, XFRMA_OFFLOAD_DEV, sizeof(*xuo));
+ if (attr == NULL)
+ return -EMSGSIZE;
+
+ xuo = nla_data(attr);
+
+ xuo->ifindex = xso->dev->ifindex;
+ xuo->flags = xso->flags;
+
+ return 0;
+}
+
static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
{
struct xfrm_algo *algo;
@@ -869,6 +890,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
&x->replay);
if (ret)
goto out;
+ if(x->xso.dev)
+ ret = copy_user_offload(&x->xso, skb);
+ if (ret)
+ goto out;
if (x->security)
ret = copy_sec_ctx(x->security, skb);
out:
@@ -932,8 +957,8 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
u8 proto = 0;
int err;
- err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
- xfrma_policy);
+ err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy,
+ NULL);
if (err < 0)
return err;
@@ -2406,6 +2431,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
+ [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
};
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2448,7 +2474,8 @@ static const struct xfrm_link {
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
};
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct nlattr *attrs[XFRMA_MAX+1];
@@ -2488,7 +2515,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs,
link->nla_max ? : XFRMA_MAX,
- link->nla_pol ? : xfrma_policy);
+ link->nla_pol ? : xfrma_policy, extack);
if (err < 0)
return err;
@@ -2622,6 +2649,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
l += nla_total_size(sizeof(*x->coaddr));
if (x->props.extra_flags)
l += nla_total_size(sizeof(x->props.extra_flags));
+ if (x->xso.dev)
+ l += nla_total_size(sizeof(x->xso));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size_64bit(sizeof(u64));
@@ -3108,7 +3137,6 @@ static bool xfrm_is_alive(const struct km_event *c)
}
static struct xfrm_mgr netlink_mgr = {
- .id = "netlink",
.notify = xfrm_send_state_notify,
.acquire = xfrm_send_acquire,
.compile_policy = xfrm_compile_policy,