summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/9p/trans_fd.c61
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/Kconfig19
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c1
-rw-r--r--net/appletalk/atalk_proc.c3
-rw-r--r--net/atm/br2684.c1
-rw-r--r--net/atm/common.c6
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/atm/mpc.c9
-rw-r--r--net/atm/mpoa_caches.c48
-rw-r--r--net/atm/mpoa_caches.h9
-rw-r--r--net/atm/mpoa_proc.c16
-rw-r--r--net/atm/proc.c1
-rw-r--r--net/ax25/af_ax25.c1
-rw-r--r--net/ax25/ax25_route.c1
-rw-r--r--net/ax25/ax25_uid.c1
-rw-r--r--net/batman-adv/Kconfig17
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.c35
-rw-r--r--net/batman-adv/bat_algo.h1
-rw-r--r--net/batman-adv/bat_iv_ogm.c111
-rw-r--r--net/batman-adv/bat_iv_ogm.h1
-rw-r--r--net/batman-adv/bat_v.c53
-rw-r--r--net/batman-adv/bat_v.h1
-rw-r--r--net/batman-adv/bat_v_elp.c29
-rw-r--r--net/batman-adv/bat_v_elp.h1
-rw-r--r--net/batman-adv/bat_v_ogm.c39
-rw-r--r--net/batman-adv/bat_v_ogm.h1
-rw-r--r--net/batman-adv/bitarray.c3
-rw-r--r--net/batman-adv/bitarray.h10
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c114
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h5
-rw-r--r--net/batman-adv/debugfs.c30
-rw-r--r--net/batman-adv/debugfs.h1
-rw-r--r--net/batman-adv/distributed-arp-table.c80
-rw-r--r--net/batman-adv/distributed-arp-table.h9
-rw-r--r--net/batman-adv/fragmentation.c27
-rw-r--r--net/batman-adv/fragmentation.h3
-rw-r--r--net/batman-adv/gateway_client.c67
-rw-r--r--net/batman-adv/gateway_client.h1
-rw-r--r--net/batman-adv/gateway_common.c30
-rw-r--r--net/batman-adv/gateway_common.h6
-rw-r--r--net/batman-adv/hard-interface.c67
-rw-r--r--net/batman-adv/hard-interface.h59
-rw-r--r--net/batman-adv/hash.c20
-rw-r--r--net/batman-adv/hash.h28
-rw-r--r--net/batman-adv/icmp_socket.c19
-rw-r--r--net/batman-adv/icmp_socket.h1
-rw-r--r--net/batman-adv/log.c21
-rw-r--r--net/batman-adv/log.h62
-rw-r--r--net/batman-adv/main.c54
-rw-r--r--net/batman-adv/main.h127
-rw-r--r--net/batman-adv/multicast.c83
-rw-r--r--net/batman-adv/multicast.h17
-rw-r--r--net/batman-adv/netlink.c27
-rw-r--r--net/batman-adv/netlink.h1
-rw-r--r--net/batman-adv/network-coding.c126
-rw-r--r--net/batman-adv/network-coding.h1
-rw-r--r--net/batman-adv/originator.c154
-rw-r--r--net/batman-adv/originator.h47
-rw-r--r--net/batman-adv/packet.h621
-rw-r--r--net/batman-adv/routing.c56
-rw-r--r--net/batman-adv/routing.h1
-rw-r--r--net/batman-adv/send.c66
-rw-r--r--net/batman-adv/send.h8
-rw-r--r--net/batman-adv/soft-interface.c66
-rw-r--r--net/batman-adv/soft-interface.h1
-rw-r--r--net/batman-adv/sysfs.c58
-rw-r--r--net/batman-adv/sysfs.h14
-rw-r--r--net/batman-adv/tp_meter.c81
-rw-r--r--net/batman-adv/tp_meter.h1
-rw-r--r--net/batman-adv/translation-table.c234
-rw-r--r--net/batman-adv/translation-table.h1
-rw-r--r--net/batman-adv/tvlv.c43
-rw-r--r--net/batman-adv/tvlv.h1
-rw-r--r--net/batman-adv/types.h1996
-rw-r--r--net/bluetooth/af_bluetooth.c44
-rw-r--r--net/bluetooth/cmtp/capi.c1
-rw-r--r--net/bluetooth/hci_debugfs.c201
-rw-r--r--net/bluetooth/hci_request.c64
-rw-r--r--net/bluetooth/hidp/core.c2
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bridge/br_device.c10
-rw-r--r--net/bridge/br_fdb.c394
-rw-r--r--net/bridge/br_mdb.c6
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/bridge/br_nf_core.c1
-rw-r--r--net/bridge/br_private.h18
-rw-r--r--net/bridge/br_switchdev.c8
-rw-r--r--net/bridge/br_sysfs_br.c13
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c120
-rw-r--r--net/caif/caif_dev.c5
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/caif/cfcnfg.c10
-rw-r--r--net/caif/cfctrl.c54
-rw-r--r--net/caif/cfpkt_skbuff.c1
-rw-r--r--net/caif/chnl_net.c1
-rw-r--r--net/can/Kconfig2
-rw-r--r--net/can/af_can.c56
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/bcm.c1
-rw-r--r--net/can/gw.c14
-rw-r--r--net/can/proc.c14
-rw-r--r--net/can/raw.c2
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c297
-rw-r--r--net/core/dev_ioctl.c132
-rw-r--r--net/core/devlink.c596
-rw-r--r--net/core/dst.c14
-rw-r--r--net/core/ethtool.c29
-rw-r--r--net/core/filter.c367
-rw-r--r--net/core/flow_dissector.c72
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/gen_stats.c9
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c5
-rw-r--r--net/core/net-procfs.c4
-rw-r--r--net/core/net-sysfs.c56
-rw-r--r--net/core/net_namespace.c85
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/pktgen.c281
-rw-r--r--net/core/rtnetlink.c495
-rw-r--r--net/core/skbuff.c88
-rw-r--r--net/core/sock.c63
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sock_reuseport.c39
-rw-r--r--net/core/sysctl_net_core.c58
-rw-r--r--net/core/xdp.c73
-rw-r--r--net/dccp/Kconfig17
-rw-r--r--net/dccp/Makefile5
-rw-r--r--net/dccp/ackvec.c2
-rw-r--r--net/dccp/ccids/ccid2.c3
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/minisocks.c13
-rw-r--r--net/dccp/probe.c203
-rw-r--r--net/dccp/proto.c16
-rw-r--r--net/dccp/trace.h84
-rw-r--r--net/decnet/af_decnet.c5
-rw-r--r--net/decnet/dn_dev.c10
-rw-r--r--net/decnet/dn_fib.c6
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_route.c43
-rw-r--r--net/dsa/Kconfig9
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa2.c34
-rw-r--r--net/dsa/dsa_priv.h13
-rw-r--r--net/dsa/legacy.c24
-rw-r--r--net/dsa/port.c103
-rw-r--r--net/dsa/slave.c26
-rw-r--r--net/dsa/switch.c111
-rw-r--r--net/dsa/tag_brcm.c12
-rw-r--r--net/dsa/tag_mtk.c38
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c45
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/devinet.c63
-rw-r--r--net/ipv4/esp4.c37
-rw-r--r--net/ipv4/esp4_offload.c77
-rw-r--r--net/ipv4/fib_frontend.c17
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/igmp.c50
-rw-r--r--net/ipv4/inet_connection_sock.c9
-rw-r--r--net/ipv4/inet_diag.c8
-rw-r--r--net/ipv4/inet_hashtables.c188
-rw-r--r--net/ipv4/inet_timewait_sock.c37
-rw-r--r--net/ipv4/ip_gre.c172
-rw-r--r--net/ipv4/ip_sockglue.c20
-rw-r--r--net/ipv4/ip_tunnel.c20
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipconfig.c48
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter.c62
-rw-r--r--net/ipv4/netfilter/Kconfig13
-rw-r--r--net/ipv4/netfilter/Makefile9
-rw-r--r--net/ipv4/netfilter/arp_tables.c35
-rw-r--r--net/ipv4/netfilter/ip_tables.c35
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c20
-rw-r--r--net/ipv4/netfilter/iptable_filter.c6
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c5
-rw-r--r--net/ipv4/netfilter/iptable_nat.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c37
-rw-r--r--net/ipv4/netfilter/iptable_security.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c13
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c284
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.asn1177
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1286
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c235
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c62
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c83
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c8
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/raw.c33
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp.c81
-rw-r--r--net/ipv4/tcp_bbr.c21
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_fastopen.c30
-rw-r--r--net/ipv4/tcp_input.c80
-rw-r--r--net/ipv4/tcp_ipv4.c66
-rw-r--r--net/ipv4/tcp_metrics.c7
-rw-r--r--net/ipv4/tcp_minisocks.c13
-rw-r--r--net/ipv4/tcp_nv.c4
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/tcp_output.c18
-rw-r--r--net/ipv4/tcp_probe.c301
-rw-r--r--net/ipv4/tcp_rate.c10
-rw-r--r--net/ipv4/tcp_recovery.c28
-rw-r--r--net/ipv4/tcp_timer.c41
-rw-r--r--net/ipv4/udp.c66
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_input.c12
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c8
-rw-r--r--net/ipv6/addrconf.c90
-rw-r--r--net/ipv6/addrlabel.c25
-rw-r--r--net/ipv6/af_inet6.c12
-rw-r--r--net/ipv6/anycast.c1
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/esp6.c39
-rw-r--r--net/ipv6/esp6_offload.c84
-rw-r--r--net/ipv6/exthdrs.c9
-rw-r--r--net/ipv6/ila/ila_xlat.c4
-rw-r--r--net/ipv6/inet6_hashtables.c77
-rw-r--r--net/ipv6/ip6_fib.c149
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6_gre.c708
-rw-r--r--net/ipv6/ip6_output.c37
-rw-r--r--net/ipv6/ip6_tunnel.c37
-rw-r--r--net/ipv6/ip6_vti.c22
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/ipv6_sockglue.c20
-rw-r--r--net/ipv6/mcast.c29
-rw-r--r--net/ipv6/ndisc.c5
-rw-r--r--net/ipv6/netfilter.c44
-rw-r--r--net/ipv6/netfilter/Kconfig18
-rw-r--r--net/ipv6/netfilter/Makefile4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c35
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c161
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c8
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c4
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c31
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c25
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c15
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c3
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c277
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c82
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c12
-rw-r--r--net/ipv6/proc.c3
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/route.c581
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/seg6_local.c2
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/tcp_ipv6.c19
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp.c55
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_input.c10
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c8
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipx/Kconfig60
-rw-r--r--net/ipx/Makefile8
-rw-r--r--net/ipx/af_ipx.c2084
-rw-r--r--net/ipx/ipx_proc.c341
-rw-r--r--net/ipx/ipx_route.c293
-rw-r--r--net/ipx/pe2.c36
-rw-r--r--net/ipx/sysctl_net_ipx.c40
-rw-r--r--net/iucv/af_iucv.c6
-rw-r--r--net/kcm/kcmproc.c2
-rw-r--r--net/kcm/kcmsock.c93
-rw-r--r--net/key/af_key.c12
-rw-r--r--net/l2tp/l2tp_core.c54
-rw-r--r--net/l2tp/l2tp_core.h16
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_netlink.c39
-rw-r--r--net/l2tp/l2tp_ppp.c1
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/mac80211/agg-rx.c26
-rw-r--r--net/mac80211/agg-tx.c34
-rw-r--r--net/mac80211/cfg.c31
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_sta.c4
-rw-r--r--net/mac80211/driver-ops.h3
-rw-r--r--net/mac80211/ht.c8
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/iface.c4
-rw-r--r--net/mac80211/key.c12
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mesh_hwmp.c16
-rw-r--r--net/mac80211/mesh_pathtbl.c34
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c12
-rw-r--r--net/mac80211/offchannel.c4
-rw-r--r--net/mac80211/rx.c19
-rw-r--r--net/mac80211/tdls.c6
-rw-r--r--net/mac80211/tx.c33
-rw-r--r--net/mac80211/util.c19
-rw-r--r--net/mac80211/wme.c1
-rw-r--r--net/mac80211/wpa.c16
-rw-r--r--net/mpls/af_mpls.c15
-rw-r--r--net/ncsi/ncsi-aen.c35
-rw-r--r--net/netfilter/Kconfig33
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/core.c263
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c8
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c8
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c8
-rw-r--r--net/netfilter/ipset/ip_set_core.c36
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h38
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c26
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c35
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c21
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c9
-rw-r--r--net/netfilter/nf_conncount.c373
-rw-r--r--net/netfilter/nf_conntrack_core.c28
-rw-r--r--net/netfilter/nf_conntrack_expect.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c168
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c77
-rw-r--r--net/netfilter/nf_conntrack_netlink.c35
-rw-r--r--net/netfilter/nf_conntrack_proto.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c28
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c10
-rw-r--r--net/netfilter/nf_conntrack_standalone.c14
-rw-r--r--net/netfilter/nf_flow_table.c429
-rw-r--r--net/netfilter/nf_flow_table_inet.c48
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/nf_queue.c96
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/nf_tables_api.c1659
-rw-r--r--net/netfilter/nf_tables_inet.c88
-rw-r--r--net/netfilter/nf_tables_netdev.c87
-rw-r--r--net/netfilter/nfnetlink.c4
-rw-r--r--net/netfilter/nfnetlink_acct.c2
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c15
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_compat.c26
-rw-r--r--net/netfilter/nft_ct.c16
-rw-r--r--net/netfilter/nft_dynset.c4
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/nft_flow_offload.c264
-rw-r--r--net/netfilter/nft_log.c4
-rw-r--r--net/netfilter/nft_masq.c2
-rw-r--r--net/netfilter/nft_meta.c45
-rw-r--r--net/netfilter/nft_nat.c2
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_rt.c15
-rw-r--r--net/netfilter/nft_set_hash.c10
-rw-r--r--net/netfilter/utils.c90
-rw-r--r--net/netfilter/x_tables.c59
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
-rw-r--r--net/netfilter/xt_LED.c1
-rw-r--r--net/netfilter/xt_TCPMSS.c5
-rw-r--r--net/netfilter/xt_addrtype.c15
-rw-r--r--net/netfilter/xt_bpf.c20
-rw-r--r--net/netfilter/xt_connlimit.c369
-rw-r--r--net/netfilter/xt_hashlimit.c5
-rw-r--r--net/netfilter/xt_ipcomp.c2
-rw-r--r--net/netfilter/xt_limit.c3
-rw-r--r--net/netfilter/xt_nfacct.c1
-rw-r--r--net/netfilter/xt_osf.c7
-rw-r--r--net/netfilter/xt_policy.c3
-rw-r--r--net/netfilter/xt_set.c119
-rw-r--r--net/netfilter/xt_statistic.c1
-rw-r--r--net/netlink/af_netlink.c78
-rw-r--r--net/netlink/diag.c8
-rw-r--r--net/netrom/af_netrom.c1
-rw-r--r--net/netrom/nr_route.c2
-rw-r--r--net/nfc/llcp_sock.c6
-rw-r--r--net/nfc/nci/uart.c2
-rw-r--r--net/openvswitch/conntrack.c38
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/flow.c21
-rw-r--r--net/openvswitch/flow_netlink.c63
-rw-r--r--net/openvswitch/meter.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/packet/af_packet.c45
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/pn_netlink.c21
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/qrtr/qrtr.c8
-rw-r--r--net/rds/af_rds.c4
-rw-r--r--net/rds/bind.c1
-rw-r--r--net/rds/cong.c10
-rw-r--r--net/rds/connection.c27
-rw-r--r--net/rds/ib.c6
-rw-r--r--net/rds/rdma.c6
-rw-r--r--net/rds/rds.h10
-rw-r--r--net/rds/send.c40
-rw-r--r--net/rds/tcp.c86
-rw-r--r--net/rds/tcp.h3
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_recv.c8
-rw-r--r--net/rds/tcp_send.c9
-rw-r--r--net/rds/threads.c20
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rose/af_rose.c1
-rw-r--r--net/rose/rose_route.c3
-rw-r--r--net/rxrpc/af_rxrpc.c28
-rw-r--r--net/rxrpc/ar-internal.h103
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c229
-rw-r--r--net/rxrpc/call_object.c62
-rw-r--r--net/rxrpc/conn_client.c54
-rw-r--r--net/rxrpc/conn_event.c124
-rw-r--r--net/rxrpc/conn_object.c76
-rw-r--r--net/rxrpc/input.c76
-rw-r--r--net/rxrpc/misc.c19
-rw-r--r--net/rxrpc/net_ns.c33
-rw-r--r--net/rxrpc/output.c43
-rw-r--r--net/rxrpc/proc.c2
-rw-r--r--net/rxrpc/recvmsg.c12
-rw-r--r--net/rxrpc/sendmsg.c126
-rw-r--r--net/rxrpc/sysctl.c60
-rw-r--r--net/sched/Kconfig3
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_connmark.c8
-rw-r--r--net/sched/act_csum.c74
-rw-r--r--net/sched/act_gact.c10
-rw-r--r--net/sched/act_ife.c18
-rw-r--r--net/sched/act_ipt.c18
-rw-r--r--net/sched/act_meta_mark.c1
-rw-r--r--net/sched/act_meta_skbtcindex.c1
-rw-r--r--net/sched/act_mirred.c29
-rw-r--r--net/sched/act_nat.c8
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c12
-rw-r--r--net/sched/act_sample.c22
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c8
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/act_tunnel_key.c10
-rw-r--r--net/sched/act_vlan.c10
-rw-r--r--net/sched/cls_api.c695
-rw-r--r--net/sched/cls_basic.c16
-rw-r--r--net/sched/cls_bpf.c167
-rw-r--r--net/sched/cls_cgroup.c12
-rw-r--r--net/sched/cls_flow.c12
-rw-r--r--net/sched/cls_flower.c54
-rw-r--r--net/sched/cls_fw.c19
-rw-r--r--net/sched/cls_matchall.c35
-rw-r--r--net/sched/cls_route.c16
-rw-r--r--net/sched/cls_rsvp.h9
-rw-r--r--net/sched/cls_tcindex.c17
-rw-r--r--net/sched/cls_u32.c146
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sched/sch_api.c312
-rw-r--r--net/sched/sch_atm.c23
-rw-r--r--net/sched/sch_cbq.c85
-rw-r--r--net/sched/sch_cbs.c31
-rw-r--r--net/sched/sch_choke.c11
-rw-r--r--net/sched/sch_codel.c8
-rw-r--r--net/sched/sch_drr.c40
-rw-r--r--net/sched/sch_dsmark.c19
-rw-r--r--net/sched/sch_fifo.c11
-rw-r--r--net/sched/sch_fq.c8
-rw-r--r--net/sched/sch_fq_codel.c13
-rw-r--r--net/sched/sch_generic.c570
-rw-r--r--net/sched/sch_gred.c16
-rw-r--r--net/sched/sch_hfsc.c28
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_htb.c29
-rw-r--r--net/sched/sch_ingress.c123
-rw-r--r--net/sched/sch_mq.c42
-rw-r--r--net/sched/sch_mqprio.c76
-rw-r--r--net/sched/sch_multiq.c19
-rw-r--r--net/sched/sch_netem.c10
-rw-r--r--net/sched/sch_pie.c8
-rw-r--r--net/sched/sch_plug.c6
-rw-r--r--net/sched/sch_prio.c81
-rw-r--r--net/sched/sch_qfq.c22
-rw-r--r--net/sched/sch_red.c52
-rw-r--r--net/sched/sch_sfb.c20
-rw-r--r--net/sched/sch_sfq.c12
-rw-r--r--net/sched/sch_tbf.c31
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/sctp/Kconfig12
-rw-r--r--net/sctp/Makefile5
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c19
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/input.c28
-rw-r--r--net/sctp/ipv6.c1
-rw-r--r--net/sctp/offload.c3
-rw-r--r--net/sctp/output.c5
-rw-r--r--net/sctp/outqueue.c35
-rw-r--r--net/sctp/probe.c244
-rw-r--r--net/sctp/proc.c7
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/sm_make_chunk.c72
-rw-r--r--net/sctp/sm_sideeffect.c51
-rw-r--r--net/sctp/sm_statefuns.c50
-rw-r--r--net/sctp/sm_statetable.c5
-rw-r--r--net/sctp/socket.c308
-rw-r--r--net/sctp/stream.c148
-rw-r--r--net/sctp/stream_interleave.c1334
-rw-r--r--net/sctp/stream_sched.c28
-rw-r--r--net/sctp/stream_sched_prio.c7
-rw-r--r--net/sctp/stream_sched_rr.c7
-rw-r--r--net/sctp/sysctl.c7
-rw-r--r--net/sctp/transport.c29
-rw-r--r--net/sctp/ulpevent.c15
-rw-r--r--net/sctp/ulpqueue.c47
-rw-r--r--net/smc/af_smc.c237
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_cdc.c52
-rw-r--r--net/smc/smc_cdc.h1
-rw-r--r--net/smc/smc_clc.c102
-rw-r--r--net/smc/smc_clc.h34
-rw-r--r--net/smc/smc_close.c208
-rw-r--r--net/smc/smc_close.h2
-rw-r--r--net/smc/smc_core.c17
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_ib.c38
-rw-r--r--net/smc/smc_rx.c5
-rw-r--r--net/smc/smc_tx.c32
-rw-r--r--net/smc/smc_wr.c50
-rw-r--r--net/smc/smc_wr.h2
-rw-r--r--net/socket.c412
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c5
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c21
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/sched.c26
-rw-r--r--net/sunrpc/svcauth_unix.c6
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xprt.c30
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c78
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c157
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c329
-rw-r--r--net/sunrpc/xprtrdma/module.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c168
-rw-r--r--net/sunrpc/xprtrdma/transport.c130
-rw-r--r--net/sunrpc/xprtrdma/verbs.c282
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h117
-rw-r--r--net/sunrpc/xprtsock.c38
-rw-r--r--net/tipc/bcast.c12
-rw-r--r--net/tipc/bearer.c5
-rw-r--r--net/tipc/core.h1
-rw-r--r--net/tipc/group.c390
-rw-r--r--net/tipc/group.h10
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/monitor.c6
-rw-r--r--net/tipc/msg.c51
-rw-r--r--net/tipc/msg.h3
-rw-r--r--net/tipc/name_table.c57
-rw-r--r--net/tipc/name_table.h9
-rw-r--r--net/tipc/node.c26
-rw-r--r--net/tipc/server.c83
-rw-r--r--net/tipc/server.h13
-rw-r--r--net/tipc/socket.c122
-rw-r--r--net/tipc/subscr.c35
-rw-r--r--net/tipc/subscr.h2
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/tls/tls_main.c17
-rw-r--r--net/tls/tls_sw.c26
-rw-r--r--net/unix/af_unix.c16
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/vmw_vsock/hyperv_transport.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c14
-rw-r--r--net/wireless/Kconfig7
-rw-r--r--net/wireless/Makefile39
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/core.c8
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/ibss.c5
-rw-r--r--net/wireless/mlme.c6
-rw-r--r--net/wireless/nl80211.c88
-rw-r--r--net/wireless/reg.c3
-rw-r--r--net/wireless/scan.c5
-rw-r--r--net/wireless/trace.h12
-rw-r--r--net/wireless/wext-compat.c3
-rw-r--r--net/wireless/wext-core.c13
-rw-r--r--net/wireless/wext-proc.c1
-rw-r--r--net/xfrm/xfrm_device.c200
-rw-r--r--net/xfrm/xfrm_input.c70
-rw-r--r--net/xfrm/xfrm_output.c2
-rw-r--r--net/xfrm/xfrm_policy.c161
-rw-r--r--net/xfrm/xfrm_proc.c1
-rw-r--r--net/xfrm/xfrm_replay.c5
-rw-r--r--net/xfrm/xfrm_state.c31
-rw-r--r--net/xfrm/xfrm_user.c44
626 files changed, 20666 insertions, 15849 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8dfdd94e430f..bad01b14a4ad 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
vlan_gvrp_uninit_applicant(real_dev);
}
- /* Take it out of our own structures, but be sure to interlock with
- * HW accelerating devices or SW vlan input packet processing if
- * VLAN is not 0 (leave it there for 802.1p).
- */
- if (vlan_id)
- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 5f1446c9f098..a662ccc166df 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -80,7 +80,6 @@ static int vlan_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations vlan_fops = {
- .owner = THIS_MODULE,
.open = vlan_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -97,7 +96,6 @@ static int vlandev_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations vlandev_fops = {
- .owner = THIS_MODULE,
.open = vlandev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 985046ae4231..d6f7f7cb79c4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -228,32 +228,31 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
}
}
-static int
-p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
+static __poll_t
+p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
{
- int ret, n;
+ __poll_t ret, n;
struct p9_trans_fd *ts = NULL;
if (client && client->status == Connected)
ts = client->trans;
- if (!ts)
- return -EREMOTEIO;
+ if (!ts) {
+ if (err)
+ *err = -EREMOTEIO;
+ return POLLERR;
+ }
if (!ts->rd->f_op->poll)
- return -EIO;
-
- if (!ts->wr->f_op->poll)
- return -EIO;
-
- ret = ts->rd->f_op->poll(ts->rd, pt);
- if (ret < 0)
- return ret;
+ ret = DEFAULT_POLLMASK;
+ else
+ ret = ts->rd->f_op->poll(ts->rd, pt);
if (ts->rd != ts->wr) {
- n = ts->wr->f_op->poll(ts->wr, pt);
- if (n < 0)
- return n;
+ if (!ts->wr->f_op->poll)
+ n = DEFAULT_POLLMASK;
+ else
+ n = ts->wr->f_op->poll(ts->wr, pt);
ret = (ret & ~POLLOUT) | (n & ~POLLIN);
}
@@ -298,7 +297,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
static void p9_read_work(struct work_struct *work)
{
- int n, err;
+ __poll_t n;
+ int err;
struct p9_conn *m;
int status = REQ_STATUS_ERROR;
@@ -398,7 +398,7 @@ end_clear:
if (test_and_clear_bit(Rpending, &m->wsched))
n = POLLIN;
else
- n = p9_fd_poll(m->client, NULL);
+ n = p9_fd_poll(m->client, NULL, NULL);
if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
@@ -448,7 +448,8 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
static void p9_write_work(struct work_struct *work)
{
- int n, err;
+ __poll_t n;
+ int err;
struct p9_conn *m;
struct p9_req_t *req;
@@ -506,7 +507,7 @@ end_clear:
if (test_and_clear_bit(Wpending, &m->wsched))
n = POLLOUT;
else
- n = p9_fd_poll(m->client, NULL);
+ n = p9_fd_poll(m->client, NULL, NULL);
if ((n & POLLOUT) &&
!test_and_set_bit(Wworksched, &m->wsched)) {
@@ -581,7 +582,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
static void p9_conn_create(struct p9_client *client)
{
- int n;
+ __poll_t n;
struct p9_trans_fd *ts = client->trans;
struct p9_conn *m = &ts->conn;
@@ -597,7 +598,7 @@ static void p9_conn_create(struct p9_client *client)
INIT_LIST_HEAD(&m->poll_pending_link);
init_poll_funcptr(&m->pt, p9_pollwait);
- n = p9_fd_poll(client, &m->pt);
+ n = p9_fd_poll(client, &m->pt, NULL);
if (n & POLLIN) {
p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
set_bit(Rpending, &m->wsched);
@@ -617,17 +618,16 @@ static void p9_conn_create(struct p9_client *client)
static void p9_poll_mux(struct p9_conn *m)
{
- int n;
+ __poll_t n;
+ int err = -ECONNRESET;
if (m->err < 0)
return;
- n = p9_fd_poll(m->client, NULL);
- if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
+ n = p9_fd_poll(m->client, NULL, &err);
+ if (n & (POLLERR | POLLHUP | POLLNVAL)) {
p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
- if (n >= 0)
- n = -ECONNRESET;
- p9_conn_cancel(m, n);
+ p9_conn_cancel(m, err);
}
if (n & POLLIN) {
@@ -663,7 +663,7 @@ static void p9_poll_mux(struct p9_conn *m)
static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
{
- int n;
+ __poll_t n;
struct p9_trans_fd *ts = client->trans;
struct p9_conn *m = &ts->conn;
@@ -680,7 +680,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
if (test_and_clear_bit(Wpending, &m->wsched))
n = POLLOUT;
else
- n = p9_fd_poll(m->client, NULL);
+ n = p9_fd_poll(m->client, NULL, NULL);
if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
schedule_work(&m->wq);
@@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
if (IS_ERR(file)) {
pr_err("%s (%d): failed to map fd\n",
__func__, task_pid_nr(current));
- sock_release(csocket);
kfree(p);
return PTR_ERR(file);
}
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 325c56043007..086a4abdfa7c 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -543,3 +543,7 @@ static void p9_trans_xen_exit(void)
return xenbus_unregister_driver(&xen_9pfs_front_driver);
}
module_exit(p9_trans_xen_exit);
+
+MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
+MODULE_DESCRIPTION("Xen Transport for 9P");
+MODULE_LICENSE("GPL");
diff --git a/net/Kconfig b/net/Kconfig
index 9dba2715919d..0428f12c25c2 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -182,6 +182,7 @@ config BRIDGE_NETFILTER
depends on BRIDGE
depends on NETFILTER && INET
depends on NETFILTER_ADVANCED
+ select NETFILTER_FAMILY_BRIDGE
default m
---help---
Enabling this option will let arptables resp. iptables see bridged
@@ -212,7 +213,6 @@ source "net/dsa/Kconfig"
source "net/8021q/Kconfig"
source "net/decnet/Kconfig"
source "net/llc/Kconfig"
-source "net/ipx/Kconfig"
source "drivers/net/appletalk/Kconfig"
source "net/x25/Kconfig"
source "net/lapb/Kconfig"
@@ -336,23 +336,6 @@ config NET_PKTGEN
To compile this code as a module, choose M here: the
module will be called pktgen.
-config NET_TCPPROBE
- tristate "TCP connection probing"
- depends on INET && PROC_FS && KPROBES
- ---help---
- This module allows for capturing the changes to TCP connection
- state in response to incoming packets. It is used for debugging
- TCP congestion avoidance modules. If you don't understand
- what was just said, you don't need it: say N.
-
- Documentation on how to use TCP connection probing can be found
- at:
-
- http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe
-
- To compile this code as a module, choose M here: the
- module will be called tcp_probe.
-
config NET_DROP_MONITOR
tristate "Network packet drop alerting service"
depends on INET && TRACEPOINTS
diff --git a/net/Makefile b/net/Makefile
index 14fede520840..a6147c61b174 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -24,7 +24,6 @@ obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
obj-$(CONFIG_NET_DSA) += dsa/
-obj-$(CONFIG_IPX) += ipx/
obj-$(CONFIG_ATALK) += appletalk/
obj-$(CONFIG_X25) += x25/
obj-$(CONFIG_LAPB) += lapb/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 309d7dbb36e8..d4c1021e74e1 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -1047,7 +1047,6 @@ static int aarp_seq_open(struct inode *inode, struct file *file)
}
const struct file_operations atalk_seq_arp_fops = {
- .owner = THIS_MODULE,
.open = aarp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index af46bc49e1e9..a3bf9d519193 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -226,7 +226,6 @@ static int atalk_seq_socket_open(struct inode *inode, struct file *file)
}
static const struct file_operations atalk_seq_interface_fops = {
- .owner = THIS_MODULE,
.open = atalk_seq_interface_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -234,7 +233,6 @@ static const struct file_operations atalk_seq_interface_fops = {
};
static const struct file_operations atalk_seq_route_fops = {
- .owner = THIS_MODULE,
.open = atalk_seq_route_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -242,7 +240,6 @@ static const struct file_operations atalk_seq_route_fops = {
};
static const struct file_operations atalk_seq_socket_fops = {
- .owner = THIS_MODULE,
.open = atalk_seq_socket_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 4e111196f902..fd94bea36ee8 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -824,7 +824,6 @@ static int br2684_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations br2684_proc_ops = {
- .owner = THIS_MODULE,
.open = br2684_proc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/atm/common.c b/net/atm/common.c
index 8a4f99114cd2..6523f38c4957 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -14,7 +14,7 @@
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
-#include <linux/time.h> /* struct timeval */
+#include <linux/time64.h> /* 64-bit time for seconds */
#include <linux/skbuff.h>
#include <linux/bitops.h>
#include <linux/init.h>
@@ -648,11 +648,11 @@ out:
return error;
}
-unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct atm_vcc *vcc;
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
diff --git a/net/atm/common.h b/net/atm/common.h
index d9d583712a91..5850649068bb 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
-unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 6676e3433261..09a1f056712a 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -992,7 +992,6 @@ static int lec_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations lec_seq_fops = {
- .owner = THIS_MODULE,
.open = lec_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 7c6a1cc760a2..31e0dcb970f8 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1089,7 +1089,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
msg->type = SND_MPOA_RES_RQST;
msg->content.in_info = entry->ctrl_info;
msg_to_mpoad(msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
mpc->in_ops->put(entry);
return;
}
@@ -1099,7 +1099,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
msg->type = SND_MPOA_RES_RQST;
msg->content.in_info = entry->ctrl_info;
msg_to_mpoad(msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
mpc->in_ops->put(entry);
return;
}
@@ -1175,8 +1175,9 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
}
entry->ctrl_info = msg->content.in_info;
- do_gettimeofday(&(entry->tv));
- do_gettimeofday(&(entry->reply_wait)); /* Used in refreshing func from now on */
+ entry->time = ktime_get_seconds();
+ /* Used in refreshing func from now on */
+ entry->reply_wait = ktime_get_seconds();
entry->refresh_time = 0;
ddprintk_cont("entry->shortcut = %p\n", entry->shortcut);
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index e01450bb32d6..4bb418313720 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -117,7 +117,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip,
memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
entry->ctrl_info.in_dst_ip = dst_ip;
- do_gettimeofday(&(entry->tv));
+ entry->time = ktime_get_seconds();
entry->retry_time = client->parameters.mpc_p4;
entry->count = 1;
entry->entry_state = INGRESS_INVALID;
@@ -148,7 +148,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
if (qos != NULL)
msg.qos = qos->qos;
msg_to_mpoad(&msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
entry->entry_state = INGRESS_RESOLVING;
}
if (entry->shortcut != NULL)
@@ -171,7 +171,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
if (qos != NULL)
msg.qos = qos->qos;
msg_to_mpoad(&msg, mpc);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
}
return CLOSED;
@@ -227,17 +227,16 @@ static void in_cache_remove_entry(in_cache_entry *entry,
static void clear_count_and_expired(struct mpoa_client *client)
{
in_cache_entry *entry, *next_entry;
- struct timeval now;
+ time64_t now;
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
write_lock_bh(&client->ingress_lock);
entry = client->in_cache;
while (entry != NULL) {
entry->count = 0;
next_entry = entry->next;
- if ((now.tv_sec - entry->tv.tv_sec)
- > entry->ctrl_info.holding_time) {
+ if ((now - entry->time) > entry->ctrl_info.holding_time) {
dprintk("holding time expired, ip = %pI4\n",
&entry->ctrl_info.in_dst_ip);
client->in_ops->remove_entry(entry, client);
@@ -253,35 +252,35 @@ static void check_resolving_entries(struct mpoa_client *client)
struct atm_mpoa_qos *qos;
in_cache_entry *entry;
- struct timeval now;
+ time64_t now;
struct k_message msg;
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
read_lock_bh(&client->ingress_lock);
entry = client->in_cache;
while (entry != NULL) {
if (entry->entry_state == INGRESS_RESOLVING) {
- if ((now.tv_sec - entry->hold_down.tv_sec) <
- client->parameters.mpc_p6) {
+
+ if ((now - entry->hold_down)
+ < client->parameters.mpc_p6) {
entry = entry->next; /* Entry in hold down */
continue;
}
- if ((now.tv_sec - entry->reply_wait.tv_sec) >
- entry->retry_time) {
+ if ((now - entry->reply_wait) > entry->retry_time) {
entry->retry_time = MPC_C1 * (entry->retry_time);
/*
* Retry time maximum exceeded,
* put entry in hold down.
*/
if (entry->retry_time > client->parameters.mpc_p5) {
- do_gettimeofday(&(entry->hold_down));
+ entry->hold_down = ktime_get_seconds();
entry->retry_time = client->parameters.mpc_p4;
entry = entry->next;
continue;
}
/* Ask daemon to send a resolution request. */
- memset(&(entry->hold_down), 0, sizeof(struct timeval));
+ memset(&entry->hold_down, 0, sizeof(time64_t));
msg.type = SND_MPOA_RES_RTRY;
memcpy(msg.MPS_ctrl, client->mps_ctrl_addr, ATM_ESA_LEN);
msg.content.in_info = entry->ctrl_info;
@@ -289,7 +288,7 @@ static void check_resolving_entries(struct mpoa_client *client)
if (qos != NULL)
msg.qos = qos->qos;
msg_to_mpoad(&msg, client);
- do_gettimeofday(&(entry->reply_wait));
+ entry->reply_wait = ktime_get_seconds();
}
}
entry = entry->next;
@@ -300,18 +299,18 @@ static void check_resolving_entries(struct mpoa_client *client)
/* Call this every MPC-p5 seconds. */
static void refresh_entries(struct mpoa_client *client)
{
- struct timeval now;
+ time64_t now;
struct in_cache_entry *entry = client->in_cache;
ddprintk("refresh_entries\n");
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
read_lock_bh(&client->ingress_lock);
while (entry != NULL) {
if (entry->entry_state == INGRESS_RESOLVED) {
if (!(entry->refresh_time))
entry->refresh_time = (2 * (entry->ctrl_info.holding_time))/3;
- if ((now.tv_sec - entry->reply_wait.tv_sec) >
+ if ((now - entry->reply_wait) >
entry->refresh_time) {
dprintk("refreshing an entry.\n");
entry->entry_state = INGRESS_REFRESHING;
@@ -480,7 +479,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
memcpy(entry->MPS_ctrl_ATM_addr, client->mps_ctrl_addr, ATM_ESA_LEN);
entry->ctrl_info = msg->content.eg_info;
- do_gettimeofday(&(entry->tv));
+ entry->time = ktime_get_seconds();
entry->entry_state = EGRESS_RESOLVED;
dprintk("new_eg_cache_entry cache_id %u\n",
ntohl(entry->ctrl_info.cache_id));
@@ -495,7 +494,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
{
- do_gettimeofday(&(entry->tv));
+ entry->time = ktime_get_seconds();
entry->entry_state = EGRESS_RESOLVED;
entry->ctrl_info.holding_time = holding_time;
}
@@ -503,17 +502,16 @@ static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
static void clear_expired(struct mpoa_client *client)
{
eg_cache_entry *entry, *next_entry;
- struct timeval now;
+ time64_t now;
struct k_message msg;
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
write_lock_irq(&client->egress_lock);
entry = client->eg_cache;
while (entry != NULL) {
next_entry = entry->next;
- if ((now.tv_sec - entry->tv.tv_sec)
- > entry->ctrl_info.holding_time) {
+ if ((now - entry->time) > entry->ctrl_info.holding_time) {
msg.type = SND_EGRESS_PURGE;
msg.content.eg_info = entry->ctrl_info;
dprintk("egress_cache: holding time expired, cache_id = %u.\n",
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 6a266669ebf4..464c4c7f8d1f 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -2,6 +2,7 @@
#ifndef MPOA_CACHES_H
#define MPOA_CACHES_H
+#include <linux/time64.h>
#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/atm.h>
@@ -16,9 +17,9 @@ void atm_mpoa_init_cache(struct mpoa_client *mpc);
typedef struct in_cache_entry {
struct in_cache_entry *next;
struct in_cache_entry *prev;
- struct timeval tv;
- struct timeval reply_wait;
- struct timeval hold_down;
+ time64_t time;
+ time64_t reply_wait;
+ time64_t hold_down;
uint32_t packets_fwded;
uint16_t entry_state;
uint32_t retry_time;
@@ -53,7 +54,7 @@ struct in_cache_ops{
typedef struct eg_cache_entry{
struct eg_cache_entry *next;
struct eg_cache_entry *prev;
- struct timeval tv;
+ time64_t time;
uint8_t MPS_ctrl_ATM_addr[ATM_ESA_LEN];
struct atm_vcc *shortcut;
uint32_t packets_rcvd;
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 8a0c17e1c203..b93cc0f18292 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -8,7 +8,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
-#include <linux/time.h>
+#include <linux/ktime.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/atmmpc.h>
@@ -57,7 +57,6 @@ static int parse_qos(const char *buff);
* Define allowed FILE OPERATIONS
*/
static const struct file_operations mpc_file_operations = {
- .owner = THIS_MODULE,
.open = proc_mpc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -138,7 +137,7 @@ static int mpc_show(struct seq_file *m, void *v)
int i;
in_cache_entry *in_entry;
eg_cache_entry *eg_entry;
- struct timeval now;
+ time64_t now;
unsigned char ip_string[16];
if (v == SEQ_START_TOKEN) {
@@ -148,15 +147,17 @@ static int mpc_show(struct seq_file *m, void *v)
seq_printf(m, "\nInterface %d:\n\n", mpc->dev_num);
seq_printf(m, "Ingress Entries:\nIP address State Holding time Packets fwded VPI VCI\n");
- do_gettimeofday(&now);
+ now = ktime_get_seconds();
for (in_entry = mpc->in_cache; in_entry; in_entry = in_entry->next) {
+ unsigned long seconds_delta = now - in_entry->time;
+
sprintf(ip_string, "%pI4", &in_entry->ctrl_info.in_dst_ip);
seq_printf(m, "%-16s%s%-14lu%-12u",
ip_string,
ingress_state_string(in_entry->entry_state),
in_entry->ctrl_info.holding_time -
- (now.tv_sec-in_entry->tv.tv_sec),
+ seconds_delta,
in_entry->packets_fwded);
if (in_entry->shortcut)
seq_printf(m, " %-3d %-3d",
@@ -169,13 +170,14 @@ static int mpc_show(struct seq_file *m, void *v)
seq_printf(m, "Egress Entries:\nIngress MPC ATM addr\nCache-id State Holding time Packets recvd Latest IP addr VPI VCI\n");
for (eg_entry = mpc->eg_cache; eg_entry; eg_entry = eg_entry->next) {
unsigned char *p = eg_entry->ctrl_info.in_MPC_data_ATM_addr;
+ unsigned long seconds_delta = now - eg_entry->time;
+
for (i = 0; i < ATM_ESA_LEN; i++)
seq_printf(m, "%02x", p[i]);
seq_printf(m, "\n%-16lu%s%-14lu%-15u",
(unsigned long)ntohl(eg_entry->ctrl_info.cache_id),
egress_state_string(eg_entry->entry_state),
- (eg_entry->ctrl_info.holding_time -
- (now.tv_sec-eg_entry->tv.tv_sec)),
+ (eg_entry->ctrl_info.holding_time - seconds_delta),
eg_entry->packets_rcvd);
/* latest IP address */
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 642f9272ab95..edc48edc95c1 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -37,7 +37,6 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
size_t count, loff_t *pos);
static const struct file_operations proc_atm_dev_ops = {
- .owner = THIS_MODULE,
.read = proc_dev_atm_read,
.llseek = noop_llseek,
};
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 06eac1f50c5e..47fdd399626b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1931,7 +1931,6 @@ static int ax25_info_open(struct inode *inode, struct file *file)
}
static const struct file_operations ax25_info_fops = {
- .owner = THIS_MODULE,
.open = ax25_info_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 0446b892618a..525558972fd9 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -336,7 +336,6 @@ static int ax25_rt_info_open(struct inode *inode, struct file *file)
}
const struct file_operations ax25_route_fops = {
- .owner = THIS_MODULE,
.open = ax25_rt_info_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 83b035f56202..4ebe91ba317a 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -194,7 +194,6 @@ static int ax25_uid_info_open(struct inode *inode, struct file *file)
}
const struct file_operations ax25_uid_fops = {
- .owner = THIS_MODULE,
.open = ax25_uid_info_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index b73b96a2854b..c44f6515be5e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,3 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+#
+# Marek Lindner, Simon Wunderlich
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of version 2 of the GNU General Public
+# License as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+
#
# B.A.T.M.A.N meshing protocol
#
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 915987bc6d29..022f6e77307b 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,4 +1,4 @@
-#
+# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 44fd073b7546..80c72c7d3cad 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -37,7 +38,8 @@ char batadv_routing_algo[20] = "BATMAN_IV";
static struct hlist_head batadv_algo_list;
/**
- * batadv_algo_init - Initialize batman-adv algorithm management data structures
+ * batadv_algo_init() - Initialize batman-adv algorithm management data
+ * structures
*/
void batadv_algo_init(void)
{
@@ -59,6 +61,12 @@ static struct batadv_algo_ops *batadv_algo_get(char *name)
return bat_algo_ops;
}
+/**
+ * batadv_algo_register() - Register callbacks for a mesh algorithm
+ * @bat_algo_ops: mesh algorithm callbacks to add
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
{
struct batadv_algo_ops *bat_algo_ops_tmp;
@@ -88,6 +96,19 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
return 0;
}
+/**
+ * batadv_algo_select() - Select algorithm of soft interface
+ * @bat_priv: the bat priv with all the soft interface information
+ * @name: name of the algorithm to select
+ *
+ * The algorithm callbacks for the soft interface will be set when the algorithm
+ * with the correct name was found. Any previous selected algorithm will not be
+ * deinitialized and the new selected algorithm will also not be initialized.
+ * It is therefore not allowed to call batadv_algo_select outside the creation
+ * function of the soft interface.
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
{
struct batadv_algo_ops *bat_algo_ops;
@@ -102,6 +123,14 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
}
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+/**
+ * batadv_algo_seq_print_text() - Print the supported algorithms in a seq file
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * Return: always 0
+ */
int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
{
struct batadv_algo_ops *bat_algo_ops;
@@ -148,7 +177,7 @@ module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
0644);
/**
- * batadv_algo_dump_entry - fill in information about one supported routing
+ * batadv_algo_dump_entry() - fill in information about one supported routing
* algorithm
* @msg: netlink message to be sent back
* @portid: Port to reply to
@@ -179,7 +208,7 @@ static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_algo_dump - fill in information about supported routing
+ * batadv_algo_dump() - fill in information about supported routing
* algorithms
* @msg: netlink message to be sent back
* @cb: Parameters to the netlink request
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 29f6312f9bf1..029221615ba3 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1b659ab652fb..79e326383726 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -26,7 +27,7 @@
#include <linux/cache.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
@@ -51,6 +52,7 @@
#include <linux/workqueue.h>
#include <net/genetlink.h>
#include <net/netlink.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
@@ -62,7 +64,6 @@
#include "netlink.h"
#include "network-coding.h"
#include "originator.h"
-#include "packet.h"
#include "routing.h"
#include "send.h"
#include "translation-table.h"
@@ -72,21 +73,28 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work);
/**
* enum batadv_dup_status - duplicate status
- * @BATADV_NO_DUP: the packet is no duplicate
- * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
- * neighbor)
- * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
- * @BATADV_PROTECTED: originator is currently protected (after reboot)
*/
enum batadv_dup_status {
+ /** @BATADV_NO_DUP: the packet is no duplicate */
BATADV_NO_DUP = 0,
+
+ /**
+ * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for
+ * the neighbor)
+ */
BATADV_ORIG_DUP,
+
+ /** @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor */
BATADV_NEIGH_DUP,
+
+ /**
+ * @BATADV_PROTECTED: originator is currently protected (after reboot)
+ */
BATADV_PROTECTED,
};
/**
- * batadv_ring_buffer_set - update the ring buffer with the given value
+ * batadv_ring_buffer_set() - update the ring buffer with the given value
* @lq_recv: pointer to the ring buffer
* @lq_index: index to store the value at
* @value: value to store in the ring buffer
@@ -98,7 +106,7 @@ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value)
}
/**
- * batadv_ring_buffer_avg - compute the average of all non-zero values stored
+ * batadv_ring_buffer_avg() - compute the average of all non-zero values stored
* in the given ring buffer
* @lq_recv: pointer to the ring buffer
*
@@ -130,7 +138,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
}
/**
- * batadv_iv_ogm_orig_free - free the private resources allocated for this
+ * batadv_iv_ogm_orig_free() - free the private resources allocated for this
* orig_node
* @orig_node: the orig_node for which the resources have to be free'd
*/
@@ -141,8 +149,8 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
}
/**
- * batadv_iv_ogm_orig_add_if - change the private structures of the orig_node to
- * include the new hard-interface
+ * batadv_iv_ogm_orig_add_if() - change the private structures of the orig_node
+ * to include the new hard-interface
* @orig_node: the orig_node that has to be changed
* @max_if_num: the current amount of interfaces
*
@@ -186,7 +194,7 @@ unlock:
}
/**
- * batadv_iv_ogm_drop_bcast_own_entry - drop section of bcast_own
+ * batadv_iv_ogm_drop_bcast_own_entry() - drop section of bcast_own
* @orig_node: the orig_node that has to be changed
* @max_if_num: the current amount of interfaces
* @del_if_num: the index of the interface being removed
@@ -224,7 +232,7 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
}
/**
- * batadv_iv_ogm_drop_bcast_own_sum_entry - drop section of bcast_own_sum
+ * batadv_iv_ogm_drop_bcast_own_sum_entry() - drop section of bcast_own_sum
* @orig_node: the orig_node that has to be changed
* @max_if_num: the current amount of interfaces
* @del_if_num: the index of the interface being removed
@@ -259,8 +267,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
}
/**
- * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to
- * exclude the removed interface
+ * batadv_iv_ogm_orig_del_if() - change the private structures of the orig_node
+ * to exclude the removed interface
* @orig_node: the orig_node that has to be changed
* @max_if_num: the current amount of interfaces
* @del_if_num: the index of the interface being removed
@@ -290,7 +298,8 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
}
/**
- * batadv_iv_ogm_orig_get - retrieve or create (if does not exist) an originator
+ * batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an
+ * originator
* @bat_priv: the bat priv with all the soft interface information
* @addr: mac address of the originator
*
@@ -447,7 +456,7 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv)
}
/**
- * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached
+ * batadv_iv_ogm_aggr_packet() - checks if there is another OGM attached
* @buff_pos: current position in the skb
* @packet_len: total length of the skb
* @tvlv_len: tvlv length of the previously considered OGM
@@ -557,7 +566,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
}
/**
- * batadv_iv_ogm_can_aggregate - find out if an OGM can be aggregated on an
+ * batadv_iv_ogm_can_aggregate() - find out if an OGM can be aggregated on an
* existing forward packet
* @new_bat_ogm_packet: OGM packet to be aggregated
* @bat_priv: the bat priv with all the soft interface information
@@ -660,7 +669,7 @@ out:
}
/**
- * batadv_iv_ogm_aggregate_new - create a new aggregated packet and add this
+ * batadv_iv_ogm_aggregate_new() - create a new aggregated packet and add this
* packet to it.
* @packet_buff: pointer to the OGM
* @packet_len: (total) length of the OGM
@@ -743,7 +752,7 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
}
/**
- * batadv_iv_ogm_queue_add - queue up an OGM for transmission
+ * batadv_iv_ogm_queue_add() - queue up an OGM for transmission
* @bat_priv: the bat priv with all the soft interface information
* @packet_buff: pointer to the OGM
* @packet_len: (total) length of the OGM
@@ -869,8 +878,8 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
}
/**
- * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for
- * the given interface
+ * batadv_iv_ogm_slide_own_bcast_window() - bitshift own OGM broadcast windows
+ * for the given interface
* @hard_iface: the interface for which the windows have to be shifted
*/
static void
@@ -987,7 +996,7 @@ out:
}
/**
- * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an
+ * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an
* originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the orig node who originally emitted the ogm packet
@@ -1152,7 +1161,7 @@ out:
}
/**
- * batadv_iv_ogm_calc_tq - calculate tq for current received ogm packet
+ * batadv_iv_ogm_calc_tq() - calculate tq for current received ogm packet
* @orig_node: the orig node who originally emitted the ogm packet
* @orig_neigh_node: the orig node struct of the neighbor who sent the packet
* @batadv_ogm_packet: the ogm packet
@@ -1214,7 +1223,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
orig_node->last_seen = jiffies;
/* find packet count of corresponding one hop neighbor */
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
if_num = if_incoming->if_num;
orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1233,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
} else {
neigh_rq_count = 0;
}
- spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
/* pay attention to not get a value bigger than 100 % */
if (orig_eq_count > neigh_rq_count)
@@ -1298,7 +1307,7 @@ out:
}
/**
- * batadv_iv_ogm_update_seqnos - process a batman packet for all interfaces,
+ * batadv_iv_ogm_update_seqnos() - process a batman packet for all interfaces,
* adjust the sequence number and find out whether it is a duplicate
* @ethhdr: ethernet header of the packet
* @batadv_ogm_packet: OGM packet to be considered
@@ -1401,7 +1410,8 @@ out:
}
/**
- * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if
+ * batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing
+ * interface
* @skb: the skb containing the OGM
* @ogm_offset: offset from skb->data to start of ogm header
* @orig_node: the (cached) orig node for the originator of this OGM
@@ -1608,7 +1618,7 @@ out:
}
/**
- * batadv_iv_ogm_process - process an incoming batman iv OGM
+ * batadv_iv_ogm_process() - process an incoming batman iv OGM
* @skb: the skb containing the OGM
* @ogm_offset: offset to the OGM which should be processed (for aggregates)
* @if_incoming: the interface where this packet was receved
@@ -1861,7 +1871,7 @@ free_skb:
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table
+ * batadv_iv_ogm_orig_print_neigh() - print neighbors for the originator table
* @orig_node: the orig_node for which the neighbors are printed
* @if_outgoing: outgoing interface for these entries
* @seq: debugfs table seq_file struct
@@ -1890,7 +1900,7 @@ batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node,
}
/**
- * batadv_iv_ogm_orig_print - print the originator table
+ * batadv_iv_ogm_orig_print() - print the originator table
* @bat_priv: the bat priv with all the soft interface information
* @seq: debugfs table seq_file struct
* @if_outgoing: the outgoing interface for which this should be printed
@@ -1960,7 +1970,7 @@ next:
#endif
/**
- * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
+ * batadv_iv_ogm_neigh_get_tq_avg() - Get the TQ average for a neighbour on a
* given outgoing interface.
* @neigh_node: Neighbour of interest
* @if_outgoing: Outgoing interface of interest
@@ -1986,7 +1996,7 @@ batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
}
/**
- * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a
+ * batadv_iv_ogm_orig_dump_subentry() - Dump an originator subentry into a
* message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
@@ -2048,7 +2058,7 @@ batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message
+ * batadv_iv_ogm_orig_dump_entry() - Dump an originator entry into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -2110,7 +2120,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a
+ * batadv_iv_ogm_orig_dump_bucket() - Dump an originator bucket into a
* message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
@@ -2153,7 +2163,7 @@ batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_iv_ogm_orig_dump - Dump the originators into a message
+ * batadv_iv_ogm_orig_dump() - Dump the originators into a message
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
@@ -2190,7 +2200,7 @@ batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_iv_hardif_neigh_print - print a single hop neighbour node
+ * batadv_iv_hardif_neigh_print() - print a single hop neighbour node
* @seq: neighbour table seq_file struct
* @hardif_neigh: hardif neighbour information
*/
@@ -2209,7 +2219,7 @@ batadv_iv_hardif_neigh_print(struct seq_file *seq,
}
/**
- * batadv_iv_ogm_neigh_print - print the single hop neighbour list
+ * batadv_iv_ogm_neigh_print() - print the single hop neighbour list
* @bat_priv: the bat priv with all the soft interface information
* @seq: neighbour table seq_file struct
*/
@@ -2242,7 +2252,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
#endif
/**
- * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors
+ * batadv_iv_ogm_neigh_diff() - calculate tq difference of two neighbors
* @neigh1: the first neighbor object of the comparison
* @if_outgoing1: outgoing interface for the first neighbor
* @neigh2: the second neighbor object of the comparison
@@ -2287,7 +2297,7 @@ out:
}
/**
- * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message
+ * batadv_iv_ogm_neigh_dump_neigh() - Dump a neighbour into a netlink message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -2326,7 +2336,7 @@ batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface
+ * batadv_iv_ogm_neigh_dump_hardif() - Dump the neighbours of a hard interface
* into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
@@ -2365,7 +2375,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message
+ * batadv_iv_ogm_neigh_dump() - Dump the neighbours into a message
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
@@ -2417,7 +2427,7 @@ batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
}
/**
- * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * batadv_iv_ogm_neigh_cmp() - compare the metrics of two neighbors
* @neigh1: the first neighbor object of the comparison
* @if_outgoing1: outgoing interface for the first neighbor
* @neigh2: the second neighbor object of the comparison
@@ -2443,7 +2453,7 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
}
/**
- * batadv_iv_ogm_neigh_is_sob - check if neigh1 is similarly good or better
+ * batadv_iv_ogm_neigh_is_sob() - check if neigh1 is similarly good or better
* than neigh2 from the metric prospective
* @neigh1: the first neighbor object of the comparison
* @if_outgoing1: outgoing interface for the first neighbor
@@ -2478,7 +2488,7 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_iv_init_sel_class - initialize GW selection class
+ * batadv_iv_init_sel_class() - initialize GW selection class
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv)
@@ -2703,7 +2713,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
#endif
/**
- * batadv_iv_gw_dump_entry - Dump a gateway into a message
+ * batadv_iv_gw_dump_entry() - Dump a gateway into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -2774,7 +2784,7 @@ out:
}
/**
- * batadv_iv_gw_dump - Dump gateways into a message
+ * batadv_iv_gw_dump() - Dump gateways into a message
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
@@ -2843,6 +2853,11 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
},
};
+/**
+ * batadv_iv_init() - B.A.T.M.A.N. IV initialization function
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int __init batadv_iv_init(void)
{
int ret;
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index ae2ab526bdb1..9dc0dd5c83df 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 341ceab8338d..27e165ac9302 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
@@ -36,6 +37,7 @@
#include <linux/workqueue.h>
#include <net/genetlink.h>
#include <net/netlink.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
@@ -48,7 +50,6 @@
#include "log.h"
#include "netlink.h"
#include "originator.h"
-#include "packet.h"
struct sk_buff;
@@ -99,7 +100,7 @@ static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_v_iface_update_mac - react to hard-interface MAC address change
+ * batadv_v_iface_update_mac() - react to hard-interface MAC address change
* @hard_iface: the modified interface
*
* If the modified interface is the primary one, update the originator
@@ -130,7 +131,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_v_orig_print_neigh - print neighbors for the originator table
+ * batadv_v_orig_print_neigh() - print neighbors for the originator table
* @orig_node: the orig_node for which the neighbors are printed
* @if_outgoing: outgoing interface for these entries
* @seq: debugfs table seq_file struct
@@ -160,7 +161,7 @@ batadv_v_orig_print_neigh(struct batadv_orig_node *orig_node,
}
/**
- * batadv_v_hardif_neigh_print - print a single ELP neighbour node
+ * batadv_v_hardif_neigh_print() - print a single ELP neighbour node
* @seq: neighbour table seq_file struct
* @hardif_neigh: hardif neighbour information
*/
@@ -181,7 +182,7 @@ batadv_v_hardif_neigh_print(struct seq_file *seq,
}
/**
- * batadv_v_neigh_print - print the single hop neighbour list
+ * batadv_v_neigh_print() - print the single hop neighbour list
* @bat_priv: the bat priv with all the soft interface information
* @seq: neighbour table seq_file struct
*/
@@ -215,7 +216,7 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
#endif
/**
- * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
+ * batadv_v_neigh_dump_neigh() - Dump a neighbour into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -258,7 +259,7 @@ batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_v_neigh_dump_hardif - Dump the neighbours of a hard interface into
+ * batadv_v_neigh_dump_hardif() - Dump the neighbours of a hard interface into
* a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
@@ -296,7 +297,7 @@ batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_v_neigh_dump - Dump the neighbours of a hard interface into a
+ * batadv_v_neigh_dump() - Dump the neighbours of a hard interface into a
* message
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
@@ -348,7 +349,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_v_orig_print - print the originator table
+ * batadv_v_orig_print() - print the originator table
* @bat_priv: the bat priv with all the soft interface information
* @seq: debugfs table seq_file struct
* @if_outgoing: the outgoing interface for which this should be printed
@@ -416,8 +417,7 @@ next:
#endif
/**
- * batadv_v_orig_dump_subentry - Dump an originator subentry into a
- * message
+ * batadv_v_orig_dump_subentry() - Dump an originator subentry into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -483,7 +483,7 @@ batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_v_orig_dump_entry - Dump an originator entry into a message
+ * batadv_v_orig_dump_entry() - Dump an originator entry into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -536,8 +536,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_v_orig_dump_bucket - Dump an originator bucket into a
- * message
+ * batadv_v_orig_dump_bucket() - Dump an originator bucket into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -578,7 +577,7 @@ batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_v_orig_dump - Dump the originators into a message
+ * batadv_v_orig_dump() - Dump the originators into a message
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
@@ -668,7 +667,7 @@ err_ifinfo1:
}
/**
- * batadv_v_init_sel_class - initialize GW selection class
+ * batadv_v_init_sel_class() - initialize GW selection class
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
@@ -704,7 +703,7 @@ static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff)
}
/**
- * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW
+ * batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW
* @gw_node: the GW to retrieve the metric for
* @bw: the pointer where the metric will be stored. The metric is computed as
* the minimum between the GW advertised throughput and the path throughput to
@@ -747,7 +746,7 @@ out:
}
/**
- * batadv_v_gw_get_best_gw_node - retrieve the best GW node
+ * batadv_v_gw_get_best_gw_node() - retrieve the best GW node
* @bat_priv: the bat priv with all the soft interface information
*
* Return: the GW node having the best GW-metric, NULL if no GW is known
@@ -785,7 +784,7 @@ next:
}
/**
- * batadv_v_gw_is_eligible - check if a originator would be selected as GW
+ * batadv_v_gw_is_eligible() - check if a originator would be selected as GW
* @bat_priv: the bat priv with all the soft interface information
* @curr_gw_orig: originator representing the currently selected GW
* @orig_node: the originator representing the new candidate
@@ -814,7 +813,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
}
orig_gw = batadv_gw_node_get(bat_priv, orig_node);
- if (!orig_node)
+ if (!orig_gw)
goto out;
if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
@@ -884,7 +883,7 @@ out:
}
/**
- * batadv_v_gw_print - print the gateway list
+ * batadv_v_gw_print() - print the gateway list
* @bat_priv: the bat priv with all the soft interface information
* @seq: gateway table seq_file struct
*/
@@ -913,7 +912,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
#endif
/**
- * batadv_v_gw_dump_entry - Dump a gateway into a message
+ * batadv_v_gw_dump_entry() - Dump a gateway into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -1004,7 +1003,7 @@ out:
}
/**
- * batadv_v_gw_dump - Dump gateways into a message
+ * batadv_v_gw_dump() - Dump gateways into a message
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
@@ -1074,7 +1073,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
};
/**
- * batadv_v_hardif_init - initialize the algorithm specific fields in the
+ * batadv_v_hardif_init() - initialize the algorithm specific fields in the
* hard-interface object
* @hard_iface: the hard-interface to initialize
*/
@@ -1088,7 +1087,7 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a
+ * batadv_v_mesh_init() - initialize the B.A.T.M.A.N. V private resources for a
* mesh
* @bat_priv: the object representing the mesh interface to initialise
*
@@ -1106,7 +1105,7 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_v_mesh_free - free the B.A.T.M.A.N. V private resources for a mesh
+ * batadv_v_mesh_free() - free the B.A.T.M.A.N. V private resources for a mesh
* @bat_priv: the object representing the mesh interface to free
*/
void batadv_v_mesh_free(struct batadv_priv *bat_priv)
@@ -1115,7 +1114,7 @@ void batadv_v_mesh_free(struct batadv_priv *bat_priv)
}
/**
- * batadv_v_init - B.A.T.M.A.N. V initialization function
+ * batadv_v_init() - B.A.T.M.A.N. V initialization function
*
* Description: Takes care of initializing all the subcomponents.
* It is invoked upon module load only.
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index dd7c4b647e6b..a17ab68bbce8 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 1de992c58b35..a83478c46597 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
@@ -24,7 +25,7 @@
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
@@ -41,18 +42,18 @@
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/cfg80211.h>
+#include <uapi/linux/batadv_packet.h>
#include "bat_algo.h"
#include "bat_v_ogm.h"
#include "hard-interface.h"
#include "log.h"
#include "originator.h"
-#include "packet.h"
#include "routing.h"
#include "send.h"
/**
- * batadv_v_elp_start_timer - restart timer for ELP periodic work
+ * batadv_v_elp_start_timer() - restart timer for ELP periodic work
* @hard_iface: the interface for which the timer has to be reset
*/
static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
@@ -67,7 +68,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_v_elp_get_throughput - get the throughput towards a neighbour
+ * batadv_v_elp_get_throughput() - get the throughput towards a neighbour
* @neigh: the neighbour for which the throughput has to be obtained
*
* Return: The throughput towards the given neighbour in multiples of 100kpbs
@@ -153,8 +154,8 @@ default_throughput:
}
/**
- * batadv_v_elp_throughput_metric_update - worker updating the throughput metric
- * of a single hop neighbour
+ * batadv_v_elp_throughput_metric_update() - worker updating the throughput
+ * metric of a single hop neighbour
* @work: the work queue item
*/
void batadv_v_elp_throughput_metric_update(struct work_struct *work)
@@ -177,7 +178,7 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work)
}
/**
- * batadv_v_elp_wifi_neigh_probe - send link probing packets to a neighbour
+ * batadv_v_elp_wifi_neigh_probe() - send link probing packets to a neighbour
* @neigh: the neighbour to probe
*
* Sends a predefined number of unicast wifi packets to a given neighbour in
@@ -240,7 +241,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
}
/**
- * batadv_v_elp_periodic_work - ELP periodic task per interface
+ * batadv_v_elp_periodic_work() - ELP periodic task per interface
* @work: work queue item
*
* Emits broadcast ELP message in regular intervals.
@@ -327,7 +328,7 @@ out:
}
/**
- * batadv_v_elp_iface_enable - setup the ELP interface private resources
+ * batadv_v_elp_iface_enable() - setup the ELP interface private resources
* @hard_iface: interface for which the data has to be prepared
*
* Return: 0 on success or a -ENOMEM in case of failure.
@@ -375,7 +376,7 @@ out:
}
/**
- * batadv_v_elp_iface_disable - release ELP interface private resources
+ * batadv_v_elp_iface_disable() - release ELP interface private resources
* @hard_iface: interface for which the resources have to be released
*/
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface)
@@ -387,7 +388,7 @@ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_v_elp_iface_activate - update the ELP buffer belonging to the given
+ * batadv_v_elp_iface_activate() - update the ELP buffer belonging to the given
* hard-interface
* @primary_iface: the new primary interface
* @hard_iface: interface holding the to-be-updated buffer
@@ -408,7 +409,7 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
}
/**
- * batadv_v_elp_primary_iface_set - change internal data to reflect the new
+ * batadv_v_elp_primary_iface_set() - change internal data to reflect the new
* primary interface
* @primary_iface: the new primary interface
*/
@@ -428,7 +429,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface)
}
/**
- * batadv_v_elp_neigh_update - update an ELP neighbour node
+ * batadv_v_elp_neigh_update() - update an ELP neighbour node
* @bat_priv: the bat priv with all the soft interface information
* @neigh_addr: the neighbour interface address
* @if_incoming: the interface the packet was received through
@@ -488,7 +489,7 @@ orig_free:
}
/**
- * batadv_v_elp_packet_recv - main ELP packet handler
+ * batadv_v_elp_packet_recv() - main ELP packet handler
* @skb: the received packet
* @if_incoming: the interface this packet was received through
*
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 376ead280ab9..5e39d0588a48 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index c251445a42a0..ba59b77c605d 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
@@ -22,7 +23,7 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
@@ -38,20 +39,20 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <uapi/linux/batadv_packet.h>
#include "bat_algo.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
#include "originator.h"
-#include "packet.h"
#include "routing.h"
#include "send.h"
#include "translation-table.h"
#include "tvlv.h"
/**
- * batadv_v_ogm_orig_get - retrieve and possibly create an originator node
+ * batadv_v_ogm_orig_get() - retrieve and possibly create an originator node
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address of the originator
*
@@ -88,7 +89,7 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_v_ogm_start_timer - restart the OGM sending timer
+ * batadv_v_ogm_start_timer() - restart the OGM sending timer
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
@@ -107,7 +108,7 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_v_ogm_send_to_if - send a batman ogm using a given interface
+ * batadv_v_ogm_send_to_if() - send a batman ogm using a given interface
* @skb: the OGM to send
* @hard_iface: the interface to use to send the OGM
*/
@@ -127,7 +128,7 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
}
/**
- * batadv_v_ogm_send - periodic worker broadcasting the own OGM
+ * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
* @work: work queue item
*/
static void batadv_v_ogm_send(struct work_struct *work)
@@ -235,7 +236,7 @@ out:
}
/**
- * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V
+ * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V
* @hard_iface: the interface to prepare
*
* Takes care of scheduling own OGM sending routine for this interface.
@@ -252,7 +253,7 @@ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_v_ogm_primary_iface_set - set a new primary interface
+ * batadv_v_ogm_primary_iface_set() - set a new primary interface
* @primary_iface: the new primary interface
*/
void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
@@ -268,8 +269,8 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
}
/**
- * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded
- * with B.A.T.M.A.N. V OGMs
+ * batadv_v_forward_penalty() - apply a penalty to the throughput metric
+ * forwarded with B.A.T.M.A.N. V OGMs
* @bat_priv: the bat priv with all the soft interface information
* @if_incoming: the interface where the OGM has been received
* @if_outgoing: the interface where the OGM has to be forwarded to
@@ -314,7 +315,7 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
}
/**
- * batadv_v_ogm_forward - check conditions and forward an OGM to the given
+ * batadv_v_ogm_forward() - check conditions and forward an OGM to the given
* outgoing interface
* @bat_priv: the bat priv with all the soft interface information
* @ogm_received: previously received OGM to be forwarded
@@ -405,7 +406,7 @@ out:
}
/**
- * batadv_v_ogm_metric_update - update route metric based on OGM
+ * batadv_v_ogm_metric_update() - update route metric based on OGM
* @bat_priv: the bat priv with all the soft interface information
* @ogm2: OGM2 structure
* @orig_node: Originator structure for which the OGM has been received
@@ -490,7 +491,7 @@ out:
}
/**
- * batadv_v_ogm_route_update - update routes based on OGM
+ * batadv_v_ogm_route_update() - update routes based on OGM
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: the Ethernet header of the OGM2
* @ogm2: OGM2 structure
@@ -590,7 +591,7 @@ out:
}
/**
- * batadv_v_ogm_process_per_outif - process a batman v OGM for an outgoing if
+ * batadv_v_ogm_process_per_outif() - process a batman v OGM for an outgoing if
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: the Ethernet header of the OGM2
* @ogm2: OGM2 structure
@@ -639,7 +640,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
}
/**
- * batadv_v_ogm_aggr_packet - checks if there is another OGM aggregated
+ * batadv_v_ogm_aggr_packet() - checks if there is another OGM aggregated
* @buff_pos: current position in the skb
* @packet_len: total length of the skb
* @tvlv_len: tvlv length of the previously considered OGM
@@ -659,7 +660,7 @@ static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
}
/**
- * batadv_v_ogm_process - process an incoming batman v OGM
+ * batadv_v_ogm_process() - process an incoming batman v OGM
* @skb: the skb containing the OGM
* @ogm_offset: offset to the OGM which should be processed (for aggregates)
* @if_incoming: the interface where this packet was receved
@@ -787,7 +788,7 @@ out:
}
/**
- * batadv_v_ogm_packet_recv - OGM2 receiving handler
+ * batadv_v_ogm_packet_recv() - OGM2 receiving handler
* @skb: the received OGM
* @if_incoming: the interface where this OGM has been received
*
@@ -851,7 +852,7 @@ free_skb:
}
/**
- * batadv_v_ogm_init - initialise the OGM2 engine
+ * batadv_v_ogm_init() - initialise the OGM2 engine
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success or a negative error code in case of failure
@@ -884,7 +885,7 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_v_ogm_free - free OGM private resources
+ * batadv_v_ogm_free() - free OGM private resources
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_v_ogm_free(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 2068770b542d..6a4c14ccc3c6 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 2b070c7e31da..bdc1ef06e05b 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
@@ -32,7 +33,7 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
}
/**
- * batadv_bit_get_packet - receive and process one packet within the sequence
+ * batadv_bit_get_packet() - receive and process one packet within the sequence
* number window
* @priv: the bat priv with all the soft interface information
* @seq_bits: pointer to the sequence number receive packet
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index cc262c9d97e0..ca9d0753dd6b 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
@@ -26,7 +27,7 @@
#include <linux/types.h>
/**
- * batadv_test_bit - check if bit is set in the current window
+ * batadv_test_bit() - check if bit is set in the current window
*
* @seq_bits: pointer to the sequence number receive packet
* @last_seqno: latest sequence number in seq_bits
@@ -46,7 +47,12 @@ static inline bool batadv_test_bit(const unsigned long *seq_bits,
return test_bit(diff, seq_bits) != 0;
}
-/* turn corresponding bit on, so we can remember that we got the packet */
+/**
+ * batadv_set_bit() - Turn corresponding bit on, so we can remember that we got
+ * the packet
+ * @seq_bits: bitmap of the packet receive window
+ * @n: relative sequence number of newly received packet
+ */
static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
{
/* if too old, just drop it */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index cdd8e8e4df0b..fad47853ad3c 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
@@ -24,7 +25,7 @@
#include <linux/crc16.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
@@ -49,6 +50,7 @@
#include <net/genetlink.h>
#include <net/netlink.h>
#include <net/sock.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "hard-interface.h"
@@ -56,7 +58,6 @@
#include "log.h"
#include "netlink.h"
#include "originator.h"
-#include "packet.h"
#include "soft-interface.h"
#include "sysfs.h"
#include "translation-table.h"
@@ -69,7 +70,7 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv,
struct batadv_bla_backbone_gw *backbone_gw);
/**
- * batadv_choose_claim - choose the right bucket for a claim.
+ * batadv_choose_claim() - choose the right bucket for a claim.
* @data: data to hash
* @size: size of the hash table
*
@@ -87,7 +88,7 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
}
/**
- * batadv_choose_backbone_gw - choose the right bucket for a backbone gateway.
+ * batadv_choose_backbone_gw() - choose the right bucket for a backbone gateway.
* @data: data to hash
* @size: size of the hash table
*
@@ -105,7 +106,7 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
}
/**
- * batadv_compare_backbone_gw - compare address and vid of two backbone gws
+ * batadv_compare_backbone_gw() - compare address and vid of two backbone gws
* @node: list node of the first entry to compare
* @data2: pointer to the second backbone gateway
*
@@ -129,7 +130,7 @@ static bool batadv_compare_backbone_gw(const struct hlist_node *node,
}
/**
- * batadv_compare_claim - compare address and vid of two claims
+ * batadv_compare_claim() - compare address and vid of two claims
* @node: list node of the first entry to compare
* @data2: pointer to the second claims
*
@@ -153,7 +154,7 @@ static bool batadv_compare_claim(const struct hlist_node *node,
}
/**
- * batadv_backbone_gw_release - release backbone gw from lists and queue for
+ * batadv_backbone_gw_release() - release backbone gw from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the backbone gw
*/
@@ -168,7 +169,7 @@ static void batadv_backbone_gw_release(struct kref *ref)
}
/**
- * batadv_backbone_gw_put - decrement the backbone gw refcounter and possibly
+ * batadv_backbone_gw_put() - decrement the backbone gw refcounter and possibly
* release it
* @backbone_gw: backbone gateway to be free'd
*/
@@ -178,8 +179,8 @@ static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
}
/**
- * batadv_claim_release - release claim from lists and queue for free after rcu
- * grace period
+ * batadv_claim_release() - release claim from lists and queue for free after
+ * rcu grace period
* @ref: kref pointer of the claim
*/
static void batadv_claim_release(struct kref *ref)
@@ -204,8 +205,7 @@ static void batadv_claim_release(struct kref *ref)
}
/**
- * batadv_claim_put - decrement the claim refcounter and possibly
- * release it
+ * batadv_claim_put() - decrement the claim refcounter and possibly release it
* @claim: claim to be free'd
*/
static void batadv_claim_put(struct batadv_bla_claim *claim)
@@ -214,7 +214,7 @@ static void batadv_claim_put(struct batadv_bla_claim *claim)
}
/**
- * batadv_claim_hash_find - looks for a claim in the claim hash
+ * batadv_claim_hash_find() - looks for a claim in the claim hash
* @bat_priv: the bat priv with all the soft interface information
* @data: search data (may be local/static data)
*
@@ -253,7 +253,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv,
}
/**
- * batadv_backbone_hash_find - looks for a backbone gateway in the hash
+ * batadv_backbone_hash_find() - looks for a backbone gateway in the hash
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address of the originator
* @vid: the VLAN ID
@@ -297,7 +297,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
}
/**
- * batadv_bla_del_backbone_claims - delete all claims for a backbone
+ * batadv_bla_del_backbone_claims() - delete all claims for a backbone
* @backbone_gw: backbone gateway where the claims should be removed
*/
static void
@@ -337,7 +337,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
}
/**
- * batadv_bla_send_claim - sends a claim frame according to the provided info
+ * batadv_bla_send_claim() - sends a claim frame according to the provided info
* @bat_priv: the bat priv with all the soft interface information
* @mac: the mac address to be announced within the claim
* @vid: the VLAN ID
@@ -457,7 +457,7 @@ out:
}
/**
- * batadv_bla_loopdetect_report - worker for reporting the loop
+ * batadv_bla_loopdetect_report() - worker for reporting the loop
* @work: work queue item
*
* Throws an uevent, as the loopdetect check function can't do that itself
@@ -487,7 +487,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work)
}
/**
- * batadv_bla_get_backbone_gw - finds or creates a backbone gateway
+ * batadv_bla_get_backbone_gw() - finds or creates a backbone gateway
* @bat_priv: the bat priv with all the soft interface information
* @orig: the mac address of the originator
* @vid: the VLAN ID
@@ -560,7 +560,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
}
/**
- * batadv_bla_update_own_backbone_gw - updates the own backbone gw for a VLAN
+ * batadv_bla_update_own_backbone_gw() - updates the own backbone gw for a VLAN
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the selected primary interface
* @vid: VLAN identifier
@@ -586,7 +586,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
}
/**
- * batadv_bla_answer_request - answer a bla request by sending own claims
+ * batadv_bla_answer_request() - answer a bla request by sending own claims
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: interface where the request came on
* @vid: the vid where the request came on
@@ -636,7 +636,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
}
/**
- * batadv_bla_send_request - send a request to repeat claims
+ * batadv_bla_send_request() - send a request to repeat claims
* @backbone_gw: the backbone gateway from whom we are out of sync
*
* When the crc is wrong, ask the backbone gateway for a full table update.
@@ -663,7 +663,7 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
}
/**
- * batadv_bla_send_announce - Send an announcement frame
+ * batadv_bla_send_announce() - Send an announcement frame
* @bat_priv: the bat priv with all the soft interface information
* @backbone_gw: our backbone gateway which should be announced
*/
@@ -684,7 +684,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
}
/**
- * batadv_bla_add_claim - Adds a claim in the claim hash
+ * batadv_bla_add_claim() - Adds a claim in the claim hash
* @bat_priv: the bat priv with all the soft interface information
* @mac: the mac address of the claim
* @vid: the VLAN ID of the frame
@@ -774,7 +774,7 @@ claim_free_ref:
}
/**
- * batadv_bla_claim_get_backbone_gw - Get valid reference for backbone_gw of
+ * batadv_bla_claim_get_backbone_gw() - Get valid reference for backbone_gw of
* claim
* @claim: claim whose backbone_gw should be returned
*
@@ -794,7 +794,7 @@ batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim)
}
/**
- * batadv_bla_del_claim - delete a claim from the claim hash
+ * batadv_bla_del_claim() - delete a claim from the claim hash
* @bat_priv: the bat priv with all the soft interface information
* @mac: mac address of the claim to be removed
* @vid: VLAN id for the claim to be removed
@@ -822,7 +822,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
}
/**
- * batadv_handle_announce - check for ANNOUNCE frame
+ * batadv_handle_announce() - check for ANNOUNCE frame
* @bat_priv: the bat priv with all the soft interface information
* @an_addr: announcement mac address (ARP Sender HW address)
* @backbone_addr: originator address of the sender (Ethernet source MAC)
@@ -880,7 +880,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
}
/**
- * batadv_handle_request - check for REQUEST frame
+ * batadv_handle_request() - check for REQUEST frame
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the primary hard interface of this batman soft interface
* @backbone_addr: backbone address to be requested (ARP sender HW MAC)
@@ -913,7 +913,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv,
}
/**
- * batadv_handle_unclaim - check for UNCLAIM frame
+ * batadv_handle_unclaim() - check for UNCLAIM frame
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the primary hard interface of this batman soft interface
* @backbone_addr: originator address of the backbone (Ethernet source)
@@ -951,7 +951,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
}
/**
- * batadv_handle_claim - check for CLAIM frame
+ * batadv_handle_claim() - check for CLAIM frame
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the primary hard interface of this batman soft interface
* @backbone_addr: originator address of the backbone (Ethernet Source)
@@ -988,7 +988,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv,
}
/**
- * batadv_check_claim_group - check for claim group membership
+ * batadv_check_claim_group() - check for claim group membership
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the primary interface of this batman interface
* @hw_src: the Hardware source in the ARP Header
@@ -1063,7 +1063,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
}
/**
- * batadv_bla_process_claim - Check if this is a claim frame, and process it
+ * batadv_bla_process_claim() - Check if this is a claim frame, and process it
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the primary hard interface of this batman soft interface
* @skb: the frame to be checked
@@ -1205,7 +1205,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
}
/**
- * batadv_bla_purge_backbone_gw - Remove backbone gateways after a timeout or
+ * batadv_bla_purge_backbone_gw() - Remove backbone gateways after a timeout or
* immediately
* @bat_priv: the bat priv with all the soft interface information
* @now: whether the whole hash shall be wiped now
@@ -1258,7 +1258,7 @@ purge_now:
}
/**
- * batadv_bla_purge_claims - Remove claims after a timeout or immediately
+ * batadv_bla_purge_claims() - Remove claims after a timeout or immediately
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the selected primary interface, may be NULL if now is set
* @now: whether the whole hash shall be wiped now
@@ -1316,7 +1316,7 @@ skip:
}
/**
- * batadv_bla_update_orig_address - Update the backbone gateways when the own
+ * batadv_bla_update_orig_address() - Update the backbone gateways when the own
* originator address changes
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: the new selected primary_if
@@ -1372,7 +1372,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
}
/**
- * batadv_bla_send_loopdetect - send a loopdetect frame
+ * batadv_bla_send_loopdetect() - send a loopdetect frame
* @bat_priv: the bat priv with all the soft interface information
* @backbone_gw: the backbone gateway for which a loop should be detected
*
@@ -1392,7 +1392,7 @@ batadv_bla_send_loopdetect(struct batadv_priv *bat_priv,
}
/**
- * batadv_bla_status_update - purge bla interfaces if necessary
+ * batadv_bla_status_update() - purge bla interfaces if necessary
* @net_dev: the soft interface net device
*/
void batadv_bla_status_update(struct net_device *net_dev)
@@ -1412,7 +1412,7 @@ void batadv_bla_status_update(struct net_device *net_dev)
}
/**
- * batadv_bla_periodic_work - performs periodic bla work
+ * batadv_bla_periodic_work() - performs periodic bla work
* @work: kernel work struct
*
* periodic work to do:
@@ -1517,7 +1517,7 @@ static struct lock_class_key batadv_claim_hash_lock_class_key;
static struct lock_class_key batadv_backbone_hash_lock_class_key;
/**
- * batadv_bla_init - initialize all bla structures
+ * batadv_bla_init() - initialize all bla structures
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success, < 0 on error.
@@ -1579,7 +1579,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_bla_check_bcast_duplist - Check if a frame is in the broadcast dup.
+ * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup.
* @bat_priv: the bat priv with all the soft interface information
* @skb: contains the bcast_packet to be checked
*
@@ -1652,7 +1652,7 @@ out:
}
/**
- * batadv_bla_is_backbone_gw_orig - Check if the originator is a gateway for
+ * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for
* the VLAN identified by vid.
* @bat_priv: the bat priv with all the soft interface information
* @orig: originator mac address
@@ -1692,7 +1692,7 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
}
/**
- * batadv_bla_is_backbone_gw - check if originator is a backbone gw for a VLAN.
+ * batadv_bla_is_backbone_gw() - check if originator is a backbone gw for a VLAN
* @skb: the frame to be checked
* @orig_node: the orig_node of the frame
* @hdr_size: maximum length of the frame
@@ -1726,7 +1726,7 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
}
/**
- * batadv_bla_free - free all bla structures
+ * batadv_bla_free() - free all bla structures
* @bat_priv: the bat priv with all the soft interface information
*
* for softinterface free or module unload
@@ -1753,7 +1753,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
}
/**
- * batadv_bla_loopdetect_check - check and handle a detected loop
+ * batadv_bla_loopdetect_check() - check and handle a detected loop
* @bat_priv: the bat priv with all the soft interface information
* @skb: the packet to check
* @primary_if: interface where the request came on
@@ -1802,7 +1802,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
/**
- * batadv_bla_rx - check packets coming from the mesh.
+ * batadv_bla_rx() - check packets coming from the mesh.
* @bat_priv: the bat priv with all the soft interface information
* @skb: the frame to be checked
* @vid: the VLAN ID of the frame
@@ -1914,7 +1914,7 @@ out:
}
/**
- * batadv_bla_tx - check packets going into the mesh
+ * batadv_bla_tx() - check packets going into the mesh
* @bat_priv: the bat priv with all the soft interface information
* @skb: the frame to be checked
* @vid: the VLAN ID of the frame
@@ -2022,7 +2022,7 @@ out:
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
+ * batadv_bla_claim_table_seq_print_text() - print the claim table in a seq file
* @seq: seq file to print on
* @offset: not used
*
@@ -2084,7 +2084,7 @@ out:
#endif
/**
- * batadv_bla_claim_dump_entry - dump one entry of the claim table
+ * batadv_bla_claim_dump_entry() - dump one entry of the claim table
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
@@ -2143,7 +2143,7 @@ out:
}
/**
- * batadv_bla_claim_dump_bucket - dump one bucket of the claim table
+ * batadv_bla_claim_dump_bucket() - dump one bucket of the claim table
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
@@ -2180,7 +2180,7 @@ unlock:
}
/**
- * batadv_bla_claim_dump - dump claim table to a netlink socket
+ * batadv_bla_claim_dump() - dump claim table to a netlink socket
* @msg: buffer for the message
* @cb: callback structure containing arguments
*
@@ -2247,8 +2247,8 @@ out:
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
- * file
+ * batadv_bla_backbone_table_seq_print_text() - print the backbone table in a
+ * seq file
* @seq: seq file to print on
* @offset: not used
*
@@ -2312,8 +2312,8 @@ out:
#endif
/**
- * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
- * to a netlink socket
+ * batadv_bla_backbone_dump_entry() - dump one entry of the backbone table to a
+ * netlink socket
* @msg: buffer for the message
* @portid: netlink port
* @seq: Sequence number of netlink message
@@ -2373,8 +2373,8 @@ out:
}
/**
- * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table
- * to a netlink socket
+ * batadv_bla_backbone_dump_bucket() - dump one bucket of the backbone table to
+ * a netlink socket
* @msg: buffer for the message
* @portid: netlink port
* @seq: Sequence number of netlink message
@@ -2410,7 +2410,7 @@ unlock:
}
/**
- * batadv_bla_backbone_dump - dump backbone table to a netlink socket
+ * batadv_bla_backbone_dump() - dump backbone table to a netlink socket
* @msg: buffer for the message
* @cb: callback structure containing arguments
*
@@ -2477,7 +2477,7 @@ out:
#ifdef CONFIG_BATMAN_ADV_DAT
/**
- * batadv_bla_check_claim - check if address is claimed
+ * batadv_bla_check_claim() - check if address is claimed
*
* @bat_priv: the bat priv with all the soft interface information
* @addr: mac address of which the claim status is checked
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 234775748b8e..b27571abcd2f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
@@ -30,8 +31,8 @@ struct seq_file;
struct sk_buff;
/**
- * batadv_bla_is_loopdetect_mac - check if the mac address is from a loop detect
- * frame sent by bridge loop avoidance
+ * batadv_bla_is_loopdetect_mac() - check if the mac address is from a loop
+ * detect frame sent by bridge loop avoidance
* @mac: mac address to check
*
* Return: true if the it looks like a loop detect frame
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index e32ad47c6efd..21d1189957a7 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -25,7 +26,6 @@
#include <linux/fs.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
-#include <linux/sched.h> /* for linux/wait.h */
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/stddef.h>
@@ -66,8 +66,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file)
}
/**
- * batadv_originators_hardif_open - handles debugfs output for the
- * originator table of an hard interface
+ * batadv_originators_hardif_open() - handles debugfs output for the originator
+ * table of an hard interface
* @inode: inode pointer to debugfs file
* @file: pointer to the seq_file
*
@@ -117,7 +117,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
#ifdef CONFIG_BATMAN_ADV_DAT
/**
- * batadv_dat_cache_open - Prepare file handler for reads from dat_chache
+ * batadv_dat_cache_open() - Prepare file handler for reads from dat_chache
* @inode: inode which was opened
* @file: file handle to be initialized
*
@@ -154,7 +154,7 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
#ifdef CONFIG_BATMAN_ADV_MCAST
/**
- * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags
+ * batadv_mcast_flags_open() - prepare file handler for reads from mcast_flags
* @inode: inode which was opened
* @file: file handle to be initialized
*
@@ -259,6 +259,9 @@ static struct batadv_debuginfo *batadv_hardif_debuginfos[] = {
NULL,
};
+/**
+ * batadv_debugfs_init() - Initialize soft interface independent debugfs entries
+ */
void batadv_debugfs_init(void)
{
struct batadv_debuginfo **bat_debug;
@@ -289,6 +292,9 @@ err:
batadv_debugfs = NULL;
}
+/**
+ * batadv_debugfs_destroy() - Remove all debugfs entries
+ */
void batadv_debugfs_destroy(void)
{
debugfs_remove_recursive(batadv_debugfs);
@@ -296,7 +302,7 @@ void batadv_debugfs_destroy(void)
}
/**
- * batadv_debugfs_add_hardif - creates the base directory for a hard interface
+ * batadv_debugfs_add_hardif() - creates the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which should be added.
*
@@ -338,7 +344,7 @@ out:
}
/**
- * batadv_debugfs_del_hardif - delete the base directory for a hard interface
+ * batadv_debugfs_del_hardif() - delete the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which is deleted.
*/
@@ -355,6 +361,12 @@ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
}
}
+/**
+ * batadv_debugfs_add_meshif() - Initialize interface dependent debugfs entries
+ * @dev: netdev struct of the soft interface
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_debugfs_add_meshif(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
@@ -401,6 +413,10 @@ out:
return -ENOMEM;
}
+/**
+ * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries
+ * @dev: netdev struct of the soft interface
+ */
void batadv_debugfs_del_meshif(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 9c5d4a65b98c..90a08d35c501 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 760c0de72582..9703c791ffc5 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
@@ -23,7 +24,7 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
@@ -55,7 +56,7 @@
static void batadv_dat_purge(struct work_struct *work);
/**
- * batadv_dat_start_timer - initialise the DAT periodic worker
+ * batadv_dat_start_timer() - initialise the DAT periodic worker
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
@@ -66,7 +67,7 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_dat_entry_release - release dat_entry from lists and queue for free
+ * batadv_dat_entry_release() - release dat_entry from lists and queue for free
* after rcu grace period
* @ref: kref pointer of the dat_entry
*/
@@ -80,7 +81,7 @@ static void batadv_dat_entry_release(struct kref *ref)
}
/**
- * batadv_dat_entry_put - decrement the dat_entry refcounter and possibly
+ * batadv_dat_entry_put() - decrement the dat_entry refcounter and possibly
* release it
* @dat_entry: dat_entry to be free'd
*/
@@ -90,7 +91,7 @@ static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
}
/**
- * batadv_dat_to_purge - check whether a dat_entry has to be purged or not
+ * batadv_dat_to_purge() - check whether a dat_entry has to be purged or not
* @dat_entry: the entry to check
*
* Return: true if the entry has to be purged now, false otherwise.
@@ -102,7 +103,7 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
}
/**
- * __batadv_dat_purge - delete entries from the DAT local storage
+ * __batadv_dat_purge() - delete entries from the DAT local storage
* @bat_priv: the bat priv with all the soft interface information
* @to_purge: function in charge to decide whether an entry has to be purged or
* not. This function takes the dat_entry as argument and has to
@@ -145,8 +146,8 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
}
/**
- * batadv_dat_purge - periodic task that deletes old entries from the local DAT
- * hash table
+ * batadv_dat_purge() - periodic task that deletes old entries from the local
+ * DAT hash table
* @work: kernel work struct
*/
static void batadv_dat_purge(struct work_struct *work)
@@ -164,7 +165,7 @@ static void batadv_dat_purge(struct work_struct *work)
}
/**
- * batadv_compare_dat - comparing function used in the local DAT hash table
+ * batadv_compare_dat() - comparing function used in the local DAT hash table
* @node: node in the local table
* @data2: second object to compare the node to
*
@@ -179,7 +180,7 @@ static bool batadv_compare_dat(const struct hlist_node *node, const void *data2)
}
/**
- * batadv_arp_hw_src - extract the hw_src field from an ARP packet
+ * batadv_arp_hw_src() - extract the hw_src field from an ARP packet
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
@@ -196,7 +197,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
}
/**
- * batadv_arp_ip_src - extract the ip_src field from an ARP packet
+ * batadv_arp_ip_src() - extract the ip_src field from an ARP packet
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
@@ -208,7 +209,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
}
/**
- * batadv_arp_hw_dst - extract the hw_dst field from an ARP packet
+ * batadv_arp_hw_dst() - extract the hw_dst field from an ARP packet
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
@@ -220,7 +221,7 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
}
/**
- * batadv_arp_ip_dst - extract the ip_dst field from an ARP packet
+ * batadv_arp_ip_dst() - extract the ip_dst field from an ARP packet
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
@@ -232,7 +233,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
}
/**
- * batadv_hash_dat - compute the hash value for an IP address
+ * batadv_hash_dat() - compute the hash value for an IP address
* @data: data to hash
* @size: size of the hash table
*
@@ -267,7 +268,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
}
/**
- * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash
+ * batadv_dat_entry_hash_find() - look for a given dat_entry in the local hash
* table
* @bat_priv: the bat priv with all the soft interface information
* @ip: search key
@@ -310,7 +311,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
}
/**
- * batadv_dat_entry_add - add a new dat entry or update it if already exists
+ * batadv_dat_entry_add() - add a new dat entry or update it if already exists
* @bat_priv: the bat priv with all the soft interface information
* @ip: ipv4 to add/edit
* @mac_addr: mac address to assign to the given ipv4
@@ -367,7 +368,8 @@ out:
#ifdef CONFIG_BATMAN_ADV_DEBUG
/**
- * batadv_dbg_arp - print a debug message containing all the ARP packet details
+ * batadv_dbg_arp() - print a debug message containing all the ARP packet
+ * details
* @bat_priv: the bat priv with all the soft interface information
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
@@ -448,7 +450,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
#endif /* CONFIG_BATMAN_ADV_DEBUG */
/**
- * batadv_is_orig_node_eligible - check whether a node can be a DHT candidate
+ * batadv_is_orig_node_eligible() - check whether a node can be a DHT candidate
* @res: the array with the already selected candidates
* @select: number of already selected candidates
* @tmp_max: address of the currently evaluated node
@@ -502,7 +504,7 @@ out:
}
/**
- * batadv_choose_next_candidate - select the next DHT candidate
+ * batadv_choose_next_candidate() - select the next DHT candidate
* @bat_priv: the bat priv with all the soft interface information
* @cands: candidates array
* @select: number of candidates already present in the array
@@ -566,8 +568,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
}
/**
- * batadv_dat_select_candidates - select the nodes which the DHT message has to
- * be sent to
+ * batadv_dat_select_candidates() - select the nodes which the DHT message has
+ * to be sent to
* @bat_priv: the bat priv with all the soft interface information
* @ip_dst: ipv4 to look up in the DHT
* @vid: VLAN identifier
@@ -612,7 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
}
/**
- * batadv_dat_send_data - send a payload to the selected candidates
+ * batadv_dat_send_data() - send a payload to the selected candidates
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @ip: the DHT key
@@ -688,7 +690,7 @@ out:
}
/**
- * batadv_dat_tvlv_container_update - update the dat tvlv container after dat
+ * batadv_dat_tvlv_container_update() - update the dat tvlv container after dat
* setting change
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -710,7 +712,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv)
}
/**
- * batadv_dat_status_update - update the dat tvlv container after dat
+ * batadv_dat_status_update() - update the dat tvlv container after dat
* setting change
* @net_dev: the soft interface net device
*/
@@ -722,7 +724,7 @@ void batadv_dat_status_update(struct net_device *net_dev)
}
/**
- * batadv_dat_tvlv_ogm_handler_v1 - process incoming dat tvlv container
+ * batadv_dat_tvlv_ogm_handler_v1() - process incoming dat tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
* @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -741,7 +743,7 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
}
/**
- * batadv_dat_hash_free - free the local DAT hash table
+ * batadv_dat_hash_free() - free the local DAT hash table
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
@@ -757,7 +759,7 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
}
/**
- * batadv_dat_init - initialise the DAT internals
+ * batadv_dat_init() - initialise the DAT internals
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 in case of success, a negative error code otherwise
@@ -782,7 +784,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_dat_free - free the DAT internals
+ * batadv_dat_free() - free the DAT internals
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_dat_free(struct batadv_priv *bat_priv)
@@ -797,7 +799,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_dat_cache_seq_print_text - print the local DAT hash table
+ * batadv_dat_cache_seq_print_text() - print the local DAT hash table
* @seq: seq file to print on
* @offset: not used
*
@@ -850,7 +852,7 @@ out:
#endif
/**
- * batadv_arp_get_type - parse an ARP packet and gets the type
+ * batadv_arp_get_type() - parse an ARP packet and gets the type
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to analyse
* @hdr_size: size of the possible header before the ARP packet in the skb
@@ -924,7 +926,7 @@ out:
}
/**
- * batadv_dat_get_vid - extract the VLAN identifier from skb if any
+ * batadv_dat_get_vid() - extract the VLAN identifier from skb if any
* @skb: the buffer containing the packet to extract the VID from
* @hdr_size: the size of the batman-adv header encapsulating the packet
*
@@ -950,7 +952,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
}
/**
- * batadv_dat_arp_create_reply - create an ARP Reply
+ * batadv_dat_arp_create_reply() - create an ARP Reply
* @bat_priv: the bat priv with all the soft interface information
* @ip_src: ARP sender IP
* @ip_dst: ARP target IP
@@ -985,7 +987,7 @@ batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src,
}
/**
- * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to
+ * batadv_dat_snoop_outgoing_arp_request() - snoop the ARP request and try to
* answer using DAT
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
@@ -1083,7 +1085,7 @@ out:
}
/**
- * batadv_dat_snoop_incoming_arp_request - snoop the ARP request and try to
+ * batadv_dat_snoop_incoming_arp_request() - snoop the ARP request and try to
* answer using the local DAT storage
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
@@ -1153,7 +1155,7 @@ out:
}
/**
- * batadv_dat_snoop_outgoing_arp_reply - snoop the ARP reply and fill the DHT
+ * batadv_dat_snoop_outgoing_arp_reply() - snoop the ARP reply and fill the DHT
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
*/
@@ -1193,8 +1195,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
}
/**
- * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local
- * DAT storage only
+ * batadv_dat_snoop_incoming_arp_reply() - snoop the ARP reply and fill the
+ * local DAT storage only
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
* @hdr_size: size of the encapsulation header
@@ -1282,8 +1284,8 @@ out:
}
/**
- * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped
- * (because the node has already obtained the reply via DAT) or not
+ * batadv_dat_drop_broadcast_packet() - check if an ARP request has to be
+ * dropped (because the node has already obtained the reply via DAT) or not
* @bat_priv: the bat priv with all the soft interface information
* @forw_packet: the broadcast packet
*
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index ec364a3c1c66..12897eb46268 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
@@ -23,9 +24,9 @@
#include <linux/compiler.h>
#include <linux/netdevice.h>
#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
#include "originator.h"
-#include "packet.h"
struct seq_file;
struct sk_buff;
@@ -48,7 +49,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet);
/**
- * batadv_dat_init_orig_node_addr - assign a DAT address to the orig_node
+ * batadv_dat_init_orig_node_addr() - assign a DAT address to the orig_node
* @orig_node: the node to assign the DAT address to
*/
static inline void
@@ -61,7 +62,7 @@ batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node)
}
/**
- * batadv_dat_init_own_addr - assign a DAT address to the node itself
+ * batadv_dat_init_own_addr() - assign a DAT address to the node itself
* @bat_priv: the bat priv with all the soft interface information
* @primary_if: a pointer to the primary interface
*/
@@ -82,7 +83,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv);
int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
/**
- * batadv_dat_inc_counter - increment the correct DAT packet counter
+ * batadv_dat_inc_counter() - increment the correct DAT packet counter
* @bat_priv: the bat priv with all the soft interface information
* @subtype: the 4addr subtype of the packet to be counted
*
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a98cf1104a30..22dde42fd80e 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
@@ -22,7 +23,7 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
@@ -32,16 +33,16 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <uapi/linux/batadv_packet.h>
#include "hard-interface.h"
#include "originator.h"
-#include "packet.h"
#include "routing.h"
#include "send.h"
#include "soft-interface.h"
/**
- * batadv_frag_clear_chain - delete entries in the fragment buffer chain
+ * batadv_frag_clear_chain() - delete entries in the fragment buffer chain
* @head: head of chain with entries.
* @dropped: whether the chain is cleared because all fragments are dropped
*
@@ -65,7 +66,7 @@ static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped)
}
/**
- * batadv_frag_purge_orig - free fragments associated to an orig
+ * batadv_frag_purge_orig() - free fragments associated to an orig
* @orig_node: originator to free fragments from
* @check_cb: optional function to tell if an entry should be purged
*/
@@ -89,7 +90,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
}
/**
- * batadv_frag_size_limit - maximum possible size of packet to be fragmented
+ * batadv_frag_size_limit() - maximum possible size of packet to be fragmented
*
* Return: the maximum size of payload that can be fragmented.
*/
@@ -104,7 +105,7 @@ static int batadv_frag_size_limit(void)
}
/**
- * batadv_frag_init_chain - check and prepare fragment chain for new fragment
+ * batadv_frag_init_chain() - check and prepare fragment chain for new fragment
* @chain: chain in fragments table to init
* @seqno: sequence number of the received fragment
*
@@ -134,7 +135,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
}
/**
- * batadv_frag_insert_packet - insert a fragment into a fragment chain
+ * batadv_frag_insert_packet() - insert a fragment into a fragment chain
* @orig_node: originator that the fragment was received from
* @skb: skb to insert
* @chain_out: list head to attach complete chains of fragments to
@@ -248,7 +249,7 @@ err:
}
/**
- * batadv_frag_merge_packets - merge a chain of fragments
+ * batadv_frag_merge_packets() - merge a chain of fragments
* @chain: head of chain with fragments
*
* Expand the first skb in the chain and copy the content of the remaining
@@ -306,7 +307,7 @@ free:
}
/**
- * batadv_frag_skb_buffer - buffer fragment for later merge
+ * batadv_frag_skb_buffer() - buffer fragment for later merge
* @skb: skb to buffer
* @orig_node_src: originator that the skb is received from
*
@@ -346,7 +347,7 @@ out_err:
}
/**
- * batadv_frag_skb_fwd - forward fragments that would exceed MTU when merged
+ * batadv_frag_skb_fwd() - forward fragments that would exceed MTU when merged
* @skb: skb to forward
* @recv_if: interface that the skb is received on
* @orig_node_src: originator that the skb is received from
@@ -400,7 +401,7 @@ out:
}
/**
- * batadv_frag_create - create a fragment from skb
+ * batadv_frag_create() - create a fragment from skb
* @skb: skb to create fragment from
* @frag_head: header to use in new fragment
* @fragment_size: size of new fragment
@@ -438,7 +439,7 @@ err:
}
/**
- * batadv_frag_send_packet - create up to 16 fragments from the passed skb
+ * batadv_frag_send_packet() - create up to 16 fragments from the passed skb
* @skb: skb to create fragments from
* @orig_node: final destination of the created fragments
* @neigh_node: next-hop of the created fragments
@@ -499,6 +500,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
*/
if (skb->priority >= 256 && skb->priority <= 263)
frag_header.priority = skb->priority - 256;
+ else
+ frag_header.priority = 0;
ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 1a2d6c308745..138b22a1836a 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
@@ -39,7 +40,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node);
/**
- * batadv_frag_check_entry - check if a list of fragments has timed out
+ * batadv_frag_check_entry() - check if a list of fragments has timed out
* @frags_entry: table entry to check
*
* Return: true if the frags entry has timed out, false otherwise.
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 10d521f0b17f..37fe9a644f22 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -22,7 +23,7 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
@@ -42,6 +43,7 @@
#include <linux/stddef.h>
#include <linux/udp.h>
#include <net/sock.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "gateway_common.h"
@@ -49,7 +51,6 @@
#include "log.h"
#include "netlink.h"
#include "originator.h"
-#include "packet.h"
#include "routing.h"
#include "soft-interface.h"
#include "sysfs.h"
@@ -68,8 +69,8 @@
#define BATADV_DHCP_CHADDR_OFFSET 28
/**
- * batadv_gw_node_release - release gw_node from lists and queue for free after
- * rcu grace period
+ * batadv_gw_node_release() - release gw_node from lists and queue for free
+ * after rcu grace period
* @ref: kref pointer of the gw_node
*/
static void batadv_gw_node_release(struct kref *ref)
@@ -83,7 +84,8 @@ static void batadv_gw_node_release(struct kref *ref)
}
/**
- * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
+ * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
+ * it
* @gw_node: gateway node to free
*/
void batadv_gw_node_put(struct batadv_gw_node *gw_node)
@@ -91,6 +93,12 @@ void batadv_gw_node_put(struct batadv_gw_node *gw_node)
kref_put(&gw_node->refcount, batadv_gw_node_release);
}
+/**
+ * batadv_gw_get_selected_gw_node() - Get currently selected gateway
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: selected gateway (with increased refcnt), NULL on errors
+ */
struct batadv_gw_node *
batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
{
@@ -109,6 +117,12 @@ out:
return gw_node;
}
+/**
+ * batadv_gw_get_selected_orig() - Get originator of currently selected gateway
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: orig_node of selected gateway (with increased refcnt), NULL on errors
+ */
struct batadv_orig_node *
batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
{
@@ -155,7 +169,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
}
/**
- * batadv_gw_reselect - force a gateway reselection
+ * batadv_gw_reselect() - force a gateway reselection
* @bat_priv: the bat priv with all the soft interface information
*
* Set a flag to remind the GW component to perform a new gateway reselection.
@@ -171,7 +185,7 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv)
}
/**
- * batadv_gw_check_client_stop - check if client mode has been switched off
+ * batadv_gw_check_client_stop() - check if client mode has been switched off
* @bat_priv: the bat priv with all the soft interface information
*
* This function assumes the caller has checked that the gw state *is actually
@@ -202,6 +216,10 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
batadv_gw_node_put(curr_gw);
}
+/**
+ * batadv_gw_election() - Elect the best gateway
+ * @bat_priv: the bat priv with all the soft interface information
+ */
void batadv_gw_election(struct batadv_priv *bat_priv)
{
struct batadv_gw_node *curr_gw = NULL;
@@ -290,6 +308,11 @@ out:
batadv_neigh_ifinfo_put(router_ifinfo);
}
+/**
+ * batadv_gw_check_election() - Elect orig node as best gateway when eligible
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node which is to be checked
+ */
void batadv_gw_check_election(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
@@ -321,7 +344,7 @@ out:
}
/**
- * batadv_gw_node_add - add gateway node to list of available gateways
+ * batadv_gw_node_add() - add gateway node to list of available gateways
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: originator announcing gateway capabilities
* @gateway: announced bandwidth information
@@ -364,7 +387,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
}
/**
- * batadv_gw_node_get - retrieve gateway node from list of available gateways
+ * batadv_gw_node_get() - retrieve gateway node from list of available gateways
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: originator announcing gateway capabilities
*
@@ -393,7 +416,7 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_gw_node_update - update list of available gateways with changed
+ * batadv_gw_node_update() - update list of available gateways with changed
* bandwidth information
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: originator announcing gateway capabilities
@@ -458,6 +481,11 @@ out:
batadv_gw_node_put(gw_node);
}
+/**
+ * batadv_gw_node_delete() - Remove orig_node from gateway list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node which is currently in process of being removed
+ */
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
@@ -469,6 +497,10 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
batadv_gw_node_update(bat_priv, orig_node, &gateway);
}
+/**
+ * batadv_gw_node_free() - Free gateway information from soft interface
+ * @bat_priv: the bat priv with all the soft interface information
+ */
void batadv_gw_node_free(struct batadv_priv *bat_priv)
{
struct batadv_gw_node *gw_node;
@@ -484,6 +516,14 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
}
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+/**
+ * batadv_gw_client_seq_print_text() - Print the gateway table in a seq file
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * Return: always 0
+ */
int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -514,7 +554,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
#endif
/**
- * batadv_gw_dump - Dump gateways into a message
+ * batadv_gw_dump() - Dump gateways into a message
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
*
@@ -567,7 +607,7 @@ out:
}
/**
- * batadv_gw_dhcp_recipient_get - check if a packet is a DHCP message
+ * batadv_gw_dhcp_recipient_get() - check if a packet is a DHCP message
* @skb: the packet to check
* @header_len: a pointer to the batman-adv header size
* @chaddr: buffer where the client address will be stored. Valid
@@ -686,7 +726,8 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
}
/**
- * batadv_gw_out_of_range - check if the dhcp request destination is the best gw
+ * batadv_gw_out_of_range() - check if the dhcp request destination is the best
+ * gateway
* @bat_priv: the bat priv with all the soft interface information
* @skb: the outgoing packet
*
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 3baa3d466e5e..981f58421a32 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 2c26039c23fc..b3e156af2256 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -26,15 +27,15 @@
#include <linux/netdevice.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <uapi/linux/batadv_packet.h>
#include "gateway_client.h"
#include "log.h"
-#include "packet.h"
#include "tvlv.h"
/**
- * batadv_parse_throughput - parse supplied string buffer to extract throughput
- * information
+ * batadv_parse_throughput() - parse supplied string buffer to extract
+ * throughput information
* @net_dev: the soft interface net device
* @buff: string buffer to parse
* @description: text shown when throughput string cannot be parsed
@@ -100,8 +101,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
}
/**
- * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download
- * and upload bandwidth information
+ * batadv_parse_gw_bandwidth() - parse supplied string buffer to extract
+ * download and upload bandwidth information
* @net_dev: the soft interface net device
* @buff: string buffer to parse
* @down: pointer holding the returned download bandwidth information
@@ -136,8 +137,8 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
}
/**
- * batadv_gw_tvlv_container_update - update the gw tvlv container after gateway
- * setting change
+ * batadv_gw_tvlv_container_update() - update the gw tvlv container after
+ * gateway setting change
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
@@ -164,6 +165,15 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
}
}
+/**
+ * batadv_gw_bandwidth_set() - Parse and set download/upload gateway bandwidth
+ * from supplied string buffer
+ * @net_dev: netdev struct of the soft interface
+ * @buff: the buffer containing the user data
+ * @count: number of bytes in the buffer
+ *
+ * Return: 'count' on success or a negative error code in case of failure
+ */
ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
size_t count)
{
@@ -207,7 +217,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
}
/**
- * batadv_gw_tvlv_ogm_handler_v1 - process incoming gateway tvlv container
+ * batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
* @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -248,7 +258,7 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
}
/**
- * batadv_gw_init - initialise the gateway handling internals
+ * batadv_gw_init() - initialise the gateway handling internals
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_gw_init(struct batadv_priv *bat_priv)
@@ -264,7 +274,7 @@ void batadv_gw_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_gw_free - free the gateway handling internals
+ * batadv_gw_free() - free the gateway handling internals
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_gw_free(struct batadv_priv *bat_priv)
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 0a6a97d201f2..afebd9c7edf4 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -32,11 +33,12 @@ enum batadv_gw_modes {
/**
* enum batadv_bandwidth_units - bandwidth unit types
- * @BATADV_BW_UNIT_KBIT: unit type kbit
- * @BATADV_BW_UNIT_MBIT: unit type mbit
*/
enum batadv_bandwidth_units {
+ /** @BATADV_BW_UNIT_KBIT: unit type kbit */
BATADV_BW_UNIT_KBIT,
+
+ /** @BATADV_BW_UNIT_MBIT: unit type mbit */
BATADV_BW_UNIT_MBIT,
};
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 4e3d5340ad96..5f186bff284a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -22,7 +23,7 @@
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
@@ -37,6 +38,7 @@
#include <linux/spinlock.h>
#include <net/net_namespace.h>
#include <net/rtnetlink.h>
+#include <uapi/linux/batadv_packet.h>
#include "bat_v.h"
#include "bridge_loop_avoidance.h"
@@ -45,14 +47,13 @@
#include "gateway_client.h"
#include "log.h"
#include "originator.h"
-#include "packet.h"
#include "send.h"
#include "soft-interface.h"
#include "sysfs.h"
#include "translation-table.h"
/**
- * batadv_hardif_release - release hard interface from lists and queue for
+ * batadv_hardif_release() - release hard interface from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the hard interface
*/
@@ -66,6 +67,12 @@ void batadv_hardif_release(struct kref *ref)
kfree_rcu(hard_iface, rcu);
}
+/**
+ * batadv_hardif_get_by_netdev() - Get hard interface object of a net_device
+ * @net_dev: net_device to search for
+ *
+ * Return: batadv_hard_iface of net_dev (with increased refcnt), NULL on errors
+ */
struct batadv_hard_iface *
batadv_hardif_get_by_netdev(const struct net_device *net_dev)
{
@@ -86,7 +93,7 @@ out:
}
/**
- * batadv_getlink_net - return link net namespace (of use fallback)
+ * batadv_getlink_net() - return link net namespace (of use fallback)
* @netdev: net_device to check
* @fallback_net: return in case get_link_net is not available for @netdev
*
@@ -105,7 +112,7 @@ static struct net *batadv_getlink_net(const struct net_device *netdev,
}
/**
- * batadv_mutual_parents - check if two devices are each others parent
+ * batadv_mutual_parents() - check if two devices are each others parent
* @dev1: 1st net dev
* @net1: 1st devices netns
* @dev2: 2nd net dev
@@ -138,7 +145,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
}
/**
- * batadv_is_on_batman_iface - check if a device is a batman iface descendant
+ * batadv_is_on_batman_iface() - check if a device is a batman iface descendant
* @net_dev: the device to check
*
* If the user creates any virtual device on top of a batman-adv interface, it
@@ -202,7 +209,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
}
/**
- * batadv_get_real_netdevice - check if the given netdev struct is a virtual
+ * batadv_get_real_netdevice() - check if the given netdev struct is a virtual
* interface on top of another 'real' interface
* @netdev: the device to check
*
@@ -246,7 +253,7 @@ out:
}
/**
- * batadv_get_real_netdev - check if the given net_device struct is a virtual
+ * batadv_get_real_netdev() - check if the given net_device struct is a virtual
* interface on top of another 'real' interface
* @net_device: the device to check
*
@@ -265,7 +272,7 @@ struct net_device *batadv_get_real_netdev(struct net_device *net_device)
}
/**
- * batadv_is_wext_netdev - check if the given net_device struct is a
+ * batadv_is_wext_netdev() - check if the given net_device struct is a
* wext wifi interface
* @net_device: the device to check
*
@@ -289,7 +296,7 @@ static bool batadv_is_wext_netdev(struct net_device *net_device)
}
/**
- * batadv_is_cfg80211_netdev - check if the given net_device struct is a
+ * batadv_is_cfg80211_netdev() - check if the given net_device struct is a
* cfg80211 wifi interface
* @net_device: the device to check
*
@@ -309,7 +316,7 @@ static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
}
/**
- * batadv_wifi_flags_evaluate - calculate wifi flags for net_device
+ * batadv_wifi_flags_evaluate() - calculate wifi flags for net_device
* @net_device: the device to check
*
* Return: batadv_hard_iface_wifi_flags flags of the device
@@ -344,7 +351,7 @@ out:
}
/**
- * batadv_is_cfg80211_hardif - check if the given hardif is a cfg80211 wifi
+ * batadv_is_cfg80211_hardif() - check if the given hardif is a cfg80211 wifi
* interface
* @hard_iface: the device to check
*
@@ -362,7 +369,7 @@ bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_is_wifi_hardif - check if the given hardif is a wifi interface
+ * batadv_is_wifi_hardif() - check if the given hardif is a wifi interface
* @hard_iface: the device to check
*
* Return: true if the net device is a 802.11 wireless device, false otherwise.
@@ -376,7 +383,7 @@ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary
+ * batadv_hardif_no_broadcast() - check whether (re)broadcast is necessary
* @if_outgoing: the outgoing interface checked and considered for (re)broadcast
* @orig_addr: the originator of this packet
* @orig_neigh: originator address of the forwarder we just got the packet from
@@ -560,6 +567,13 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface)
soft_iface->needed_tailroom = lower_tailroom;
}
+/**
+ * batadv_hardif_min_mtu() - Calculate maximum MTU for soft interface
+ * @soft_iface: netdev struct of the soft interface
+ *
+ * Return: MTU for the soft-interface (limited by the minimal MTU of all active
+ * slave interfaces)
+ */
int batadv_hardif_min_mtu(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -606,7 +620,11 @@ out:
return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN);
}
-/* adjusts the MTU if a new interface with a smaller MTU appeared. */
+/**
+ * batadv_update_min_mtu() - Adjusts the MTU if a new interface with a smaller
+ * MTU appeared
+ * @soft_iface: netdev struct of the soft interface
+ */
void batadv_update_min_mtu(struct net_device *soft_iface)
{
soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
@@ -667,7 +685,7 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_master_del_slave - remove hard_iface from the current master interface
+ * batadv_master_del_slave() - remove hard_iface from the current master iface
* @slave: the interface enslaved in another master
* @master: the master from which slave has to be removed
*
@@ -691,6 +709,14 @@ static int batadv_master_del_slave(struct batadv_hard_iface *slave,
return ret;
}
+/**
+ * batadv_hardif_enable_interface() - Enslave hard interface to soft interface
+ * @hard_iface: hard interface to add to soft interface
+ * @net: the applicable net namespace
+ * @iface_name: name of the soft interface
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
struct net *net, const char *iface_name)
{
@@ -802,6 +828,12 @@ err:
return ret;
}
+/**
+ * batadv_hardif_disable_interface() - Remove hard interface from soft interface
+ * @hard_iface: hard interface to be removed
+ * @autodel: whether to delete soft interface when it doesn't contain any other
+ * slave interfaces
+ */
void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
enum batadv_hard_if_cleanup autodel)
{
@@ -936,6 +968,9 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
batadv_hardif_put(hard_iface);
}
+/**
+ * batadv_hardif_remove_interfaces() - Remove all hard interfaces
+ */
void batadv_hardif_remove_interfaces(void)
{
struct batadv_hard_iface *hard_iface, *hard_iface_tmp;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 9f9890ff7a22..de5e9a374ece 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -30,36 +31,74 @@
struct net_device;
struct net;
+/**
+ * enum batadv_hard_if_state - State of a hard interface
+ */
enum batadv_hard_if_state {
+ /**
+ * @BATADV_IF_NOT_IN_USE: interface is not used as slave interface of a
+ * batman-adv soft interface
+ */
BATADV_IF_NOT_IN_USE,
+
+ /**
+ * @BATADV_IF_TO_BE_REMOVED: interface will be removed from soft
+ * interface
+ */
BATADV_IF_TO_BE_REMOVED,
+
+ /** @BATADV_IF_INACTIVE: interface is deactivated */
BATADV_IF_INACTIVE,
+
+ /** @BATADV_IF_ACTIVE: interface is used */
BATADV_IF_ACTIVE,
+
+ /** @BATADV_IF_TO_BE_ACTIVATED: interface is getting activated */
BATADV_IF_TO_BE_ACTIVATED,
+
+ /**
+ * @BATADV_IF_I_WANT_YOU: interface is queued up (using sysfs) for being
+ * added as slave interface of a batman-adv soft interface
+ */
BATADV_IF_I_WANT_YOU,
};
/**
* enum batadv_hard_if_bcast - broadcast avoidance options
- * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface
- * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient
- * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from
- * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator
*/
enum batadv_hard_if_bcast {
+ /** @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface */
BATADV_HARDIF_BCAST_OK = 0,
+
+ /**
+ * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no
+ * recipient
+ */
BATADV_HARDIF_BCAST_NORECIPIENT,
+
+ /**
+ * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it
+ * from
+ */
BATADV_HARDIF_BCAST_DUPFWD,
+
+ /** @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator */
BATADV_HARDIF_BCAST_DUPORIG,
};
/**
* enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
- * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
- * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
*/
enum batadv_hard_if_cleanup {
+ /**
+ * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
+ */
BATADV_IF_CLEANUP_KEEP,
+
+ /**
+ * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was
+ * removed
+ */
BATADV_IF_CLEANUP_AUTO,
};
@@ -82,7 +121,7 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
u8 *orig_addr, u8 *orig_neigh);
/**
- * batadv_hardif_put - decrement the hard interface refcounter and possibly
+ * batadv_hardif_put() - decrement the hard interface refcounter and possibly
* release it
* @hard_iface: the hard interface to free
*/
@@ -91,6 +130,12 @@ static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
kref_put(&hard_iface->refcount, batadv_hardif_release);
}
+/**
+ * batadv_primary_if_get_selected() - Get reference to primary interface
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: primary interface (with increased refcnt), otherwise NULL
+ */
static inline struct batadv_hard_iface *
batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
{
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index b5f7e13918ac..04d964358c98 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
@@ -18,7 +19,7 @@
#include "hash.h"
#include "main.h"
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/lockdep.h>
#include <linux/slab.h>
@@ -33,7 +34,10 @@ static void batadv_hash_init(struct batadv_hashtable *hash)
}
}
-/* free only the hashtable and the hash itself. */
+/**
+ * batadv_hash_destroy() - Free only the hashtable and the hash itself
+ * @hash: hash object to destroy
+ */
void batadv_hash_destroy(struct batadv_hashtable *hash)
{
kfree(hash->list_locks);
@@ -41,7 +45,12 @@ void batadv_hash_destroy(struct batadv_hashtable *hash)
kfree(hash);
}
-/* allocates and clears the hash */
+/**
+ * batadv_hash_new() - Allocates and clears the hashtable
+ * @size: number of hash buckets to allocate
+ *
+ * Return: newly allocated hashtable, NULL on errors
+ */
struct batadv_hashtable *batadv_hash_new(u32 size)
{
struct batadv_hashtable *hash;
@@ -70,6 +79,11 @@ free_hash:
return NULL;
}
+/**
+ * batadv_hash_set_lock_class() - Set specific lockdep class for hash spinlocks
+ * @hash: hash object to modify
+ * @key: lockdep class key address
+ */
void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
struct lock_class_key *key)
{
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 0c905e91c5e2..4ce1b6d3ad5c 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
@@ -45,10 +46,18 @@ typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *,
typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32);
typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *);
+/**
+ * struct batadv_hashtable - Wrapper of simple hlist based hashtable
+ */
struct batadv_hashtable {
- struct hlist_head *table; /* the hashtable itself with the buckets */
- spinlock_t *list_locks; /* spinlock for each hash list entry */
- u32 size; /* size of hashtable */
+ /** @table: the hashtable itself with the buckets */
+ struct hlist_head *table;
+
+ /** @list_locks: spinlock for each hash list entry */
+ spinlock_t *list_locks;
+
+ /** @size: size of hashtable */
+ u32 size;
};
/* allocates and clears the hash */
@@ -62,7 +71,7 @@ void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
void batadv_hash_destroy(struct batadv_hashtable *hash);
/**
- * batadv_hash_add - adds data to the hashtable
+ * batadv_hash_add() - adds data to the hashtable
* @hash: storage hash table
* @compare: callback to determine if 2 hash elements are identical
* @choose: callback calculating the hash index
@@ -112,8 +121,15 @@ out:
return ret;
}
-/* removes data from hash, if found. data could be the structure you use with
- * just the key filled, we just need the key for comparing.
+/**
+ * batadv_hash_remove() - Removes data from hash, if found
+ * @hash: hash table
+ * @compare: callback to determine if 2 hash elements are identical
+ * @choose: callback calculating the hash index
+ * @data: data passed to the aforementioned callbacks as argument
+ *
+ * ata could be the structure you use with just the key filled, we just need
+ * the key for comparing.
*
* Return: returns pointer do data on success, so you can remove the used
* structure yourself, or NULL on error
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bded31121d12..581375d0eed2 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -26,6 +27,7 @@
#include <linux/export.h>
#include <linux/fcntl.h>
#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/kernel.h>
#include <linux/list.h>
@@ -42,11 +44,11 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
+#include <uapi/linux/batadv_packet.h>
#include "hard-interface.h"
#include "log.h"
#include "originator.h"
-#include "packet.h"
#include "send.h"
static struct batadv_socket_client *batadv_socket_client_hash[256];
@@ -55,6 +57,9 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
struct batadv_icmp_header *icmph,
size_t icmp_len);
+/**
+ * batadv_socket_init() - Initialize soft interface independent socket data
+ */
void batadv_socket_init(void)
{
memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash));
@@ -292,7 +297,7 @@ out:
return len;
}
-static unsigned int batadv_socket_poll(struct file *file, poll_table *wait)
+static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
{
struct batadv_socket_client *socket_client = file->private_data;
@@ -314,6 +319,12 @@ static const struct file_operations batadv_fops = {
.llseek = no_llseek,
};
+/**
+ * batadv_socket_setup() - Create debugfs "socket" file
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_socket_setup(struct batadv_priv *bat_priv)
{
struct dentry *d;
@@ -333,7 +344,7 @@ err:
}
/**
- * batadv_socket_add_packet - schedule an icmp packet to be sent to
+ * batadv_socket_add_packet() - schedule an icmp packet to be sent to
* userspace on an icmp socket.
* @socket_client: the socket this packet belongs to
* @icmph: pointer to the header of the icmp packet
@@ -390,7 +401,7 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
}
/**
- * batadv_socket_receive_packet - schedule an icmp packet to be received
+ * batadv_socket_receive_packet() - schedule an icmp packet to be received
* locally and sent to userspace.
* @icmph: pointer to the header of the icmp packet
* @icmp_len: total length of the icmp packet
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index f3fec40aae86..84cddd01eeab 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 4ef4bde2cc2d..9be74a44e99d 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -24,6 +25,7 @@
#include <linux/export.h>
#include <linux/fcntl.h>
#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -86,6 +88,13 @@ static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
return 0;
}
+/**
+ * batadv_debug_log() - Add debug log entry
+ * @bat_priv: the bat priv with all the soft interface information
+ * @fmt: format string
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
{
va_list args;
@@ -176,7 +185,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
return error;
}
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
+static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
{
struct batadv_priv *bat_priv = file->private_data;
struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
@@ -197,6 +206,12 @@ static const struct file_operations batadv_log_fops = {
.llseek = no_llseek,
};
+/**
+ * batadv_debug_log_setup() - Initialize debug log
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_debug_log_setup(struct batadv_priv *bat_priv)
{
struct dentry *d;
@@ -222,6 +237,10 @@ err:
return -ENOMEM;
}
+/**
+ * batadv_debug_log_cleanup() - Destroy debug log
+ * @bat_priv: the bat priv with all the soft interface information
+ */
void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
{
kfree(bat_priv->debug_log);
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 65ce97efa6b5..35e02b2b9e72 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -44,25 +45,33 @@ static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
/**
* enum batadv_dbg_level - available log levels
- * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
- * @BATADV_DBG_ROUTES: route added / changed / deleted
- * @BATADV_DBG_TT: translation table messages
- * @BATADV_DBG_BLA: bridge loop avoidance messages
- * @BATADV_DBG_DAT: ARP snooping and DAT related messages
- * @BATADV_DBG_NC: network coding related messages
- * @BATADV_DBG_MCAST: multicast related messages
- * @BATADV_DBG_TP_METER: throughput meter messages
- * @BATADV_DBG_ALL: the union of all the above log levels
*/
enum batadv_dbg_level {
+ /** @BATADV_DBG_BATMAN: OGM and TQ computations related messages */
BATADV_DBG_BATMAN = BIT(0),
+
+ /** @BATADV_DBG_ROUTES: route added / changed / deleted */
BATADV_DBG_ROUTES = BIT(1),
+
+ /** @BATADV_DBG_TT: translation table messages */
BATADV_DBG_TT = BIT(2),
+
+ /** @BATADV_DBG_BLA: bridge loop avoidance messages */
BATADV_DBG_BLA = BIT(3),
+
+ /** @BATADV_DBG_DAT: ARP snooping and DAT related messages */
BATADV_DBG_DAT = BIT(4),
+
+ /** @BATADV_DBG_NC: network coding related messages */
BATADV_DBG_NC = BIT(5),
+
+ /** @BATADV_DBG_MCAST: multicast related messages */
BATADV_DBG_MCAST = BIT(6),
+
+ /** @BATADV_DBG_TP_METER: throughput meter messages */
BATADV_DBG_TP_METER = BIT(7),
+
+ /** @BATADV_DBG_ALL: the union of all the above log levels */
BATADV_DBG_ALL = 255,
};
@@ -70,7 +79,14 @@ enum batadv_dbg_level {
int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
__printf(2, 3);
-/* possibly ratelimited debug output */
+/**
+ * _batadv_dbg() - Store debug output with(out) ratelimiting
+ * @type: type of debug message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ratelimited: whether output should be rate limited
+ * @fmt: format string
+ * @arg...: variable arguments
+ */
#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
do { \
struct batadv_priv *__batpriv = (bat_priv); \
@@ -89,11 +105,30 @@ static inline void _batadv_dbg(int type __always_unused,
}
#endif
+/**
+ * batadv_dbg() - Store debug output without ratelimiting
+ * @type: type of debug message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @arg...: format string and variable arguments
+ */
#define batadv_dbg(type, bat_priv, arg...) \
_batadv_dbg(type, bat_priv, 0, ## arg)
+
+/**
+ * batadv_dbg_ratelimited() - Store debug output with ratelimiting
+ * @type: type of debug message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @arg...: format string and variable arguments
+ */
#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
_batadv_dbg(type, bat_priv, 1, ## arg)
+/**
+ * batadv_info() - Store message in debug buffer and print it to kmsg buffer
+ * @net_dev: the soft interface net device
+ * @fmt: format string
+ * @arg...: variable arguments
+ */
#define batadv_info(net_dev, fmt, arg...) \
do { \
struct net_device *_netdev = (net_dev); \
@@ -101,6 +136,13 @@ static inline void _batadv_dbg(int type __always_unused,
batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
pr_info("%s: " fmt, _netdev->name, ## arg); \
} while (0)
+
+/**
+ * batadv_err() - Store error in debug buffer and print it to kmsg buffer
+ * @net_dev: the soft interface net device
+ * @fmt: format string
+ * @arg...: variable arguments
+ */
#define batadv_err(net_dev, fmt, arg...) \
do { \
struct net_device *_netdev = (net_dev); \
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4daed7ad46f2..d31c8266e244 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -18,12 +19,12 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/bug.h>
+#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
#include <linux/crc32c.h>
#include <linux/errno.h>
-#include <linux/fs.h>
#include <linux/genetlink.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
@@ -45,6 +46,7 @@
#include <linux/workqueue.h>
#include <net/dsfield.h>
#include <net/rtnetlink.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
@@ -62,7 +64,6 @@
#include "netlink.h"
#include "network-coding.h"
#include "originator.h"
-#include "packet.h"
#include "routing.h"
#include "send.h"
#include "soft-interface.h"
@@ -139,6 +140,12 @@ static void __exit batadv_exit(void)
batadv_tt_cache_destroy();
}
+/**
+ * batadv_mesh_init() - Initialize soft interface
+ * @soft_iface: netdev struct of the soft interface
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_mesh_init(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -216,6 +223,10 @@ err:
return ret;
}
+/**
+ * batadv_mesh_free() - Deinitialize soft interface
+ * @soft_iface: netdev struct of the soft interface
+ */
void batadv_mesh_free(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -255,8 +266,8 @@ void batadv_mesh_free(struct net_device *soft_iface)
}
/**
- * batadv_is_my_mac - check if the given mac address belongs to any of the real
- * interfaces in the current mesh
+ * batadv_is_my_mac() - check if the given mac address belongs to any of the
+ * real interfaces in the current mesh
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address to check
*
@@ -286,7 +297,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_seq_print_text_primary_if_get - called from debugfs table printing
+ * batadv_seq_print_text_primary_if_get() - called from debugfs table printing
* function that requires the primary interface
* @seq: debugfs table seq_file struct
*
@@ -323,7 +334,7 @@ out:
#endif
/**
- * batadv_max_header_len - calculate maximum encapsulation overhead for a
+ * batadv_max_header_len() - calculate maximum encapsulation overhead for a
* payload packet
*
* Return: the maximum encapsulation overhead in bytes.
@@ -348,7 +359,7 @@ int batadv_max_header_len(void)
}
/**
- * batadv_skb_set_priority - sets skb priority according to packet content
+ * batadv_skb_set_priority() - sets skb priority according to packet content
* @skb: the packet to be sent
* @offset: offset to the packet content
*
@@ -412,6 +423,16 @@ static int batadv_recv_unhandled_packet(struct sk_buff *skb,
/* incoming packets with the batman ethertype received on any active hard
* interface
*/
+
+/**
+ * batadv_batman_skb_recv() - Handle incoming message from an hard interface
+ * @skb: the received packet
+ * @dev: the net device that the packet was received on
+ * @ptype: packet type of incoming packet (ETH_P_BATMAN)
+ * @orig_dev: the original receive net device (e.g. bonded device)
+ *
+ * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
+ */
int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype,
struct net_device *orig_dev)
@@ -535,6 +556,13 @@ static void batadv_recv_handler_init(void)
batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet;
}
+/**
+ * batadv_recv_handler_register() - Register handler for batman-adv packet type
+ * @packet_type: batadv_packettype which should be handled
+ * @recv_handler: receive handler for the packet type
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int
batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
@@ -552,13 +580,17 @@ batadv_recv_handler_register(u8 packet_type,
return 0;
}
+/**
+ * batadv_recv_handler_unregister() - Unregister handler for packet type
+ * @packet_type: batadv_packettype which should no longer be handled
+ */
void batadv_recv_handler_unregister(u8 packet_type)
{
batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet;
}
/**
- * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in
+ * batadv_skb_crc32() - calculate CRC32 of the whole packet and skip bytes in
* the header
* @skb: skb pointing to fragmented socket buffers
* @payload_ptr: Pointer to position inside the head buffer of the skb
@@ -591,7 +623,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
}
/**
- * batadv_get_vid - extract the VLAN identifier from skb if any
+ * batadv_get_vid() - extract the VLAN identifier from skb if any
* @skb: the buffer containing the packet
* @header_len: length of the batman header preceding the ethernet header
*
@@ -618,7 +650,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
}
/**
- * batadv_vlan_ap_isola_get - return the AP isolation status for the given vlan
+ * batadv_vlan_ap_isola_get() - return AP isolation status for the given vlan
* @bat_priv: the bat priv with all the soft interface information
* @vid: the VLAN identifier for which the AP isolation attributed as to be
* looked up
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index edb2f239d04d..f7ba3f96d8f3 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -24,7 +25,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.4"
+#define BATADV_SOURCE_VERSION "2018.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -140,24 +141,56 @@
*/
#define BATADV_TP_MAX_NUM 5
+/**
+ * enum batadv_mesh_state - State of a soft interface
+ */
enum batadv_mesh_state {
+ /** @BATADV_MESH_INACTIVE: soft interface is not yet running */
BATADV_MESH_INACTIVE,
+
+ /** @BATADV_MESH_ACTIVE: interface is up and running */
BATADV_MESH_ACTIVE,
+
+ /** @BATADV_MESH_DEACTIVATING: interface is getting shut down */
BATADV_MESH_DEACTIVATING,
};
#define BATADV_BCAST_QUEUE_LEN 256
#define BATADV_BATMAN_QUEUE_LEN 256
+/**
+ * enum batadv_uev_action - action type of uevent
+ */
enum batadv_uev_action {
+ /** @BATADV_UEV_ADD: gateway was selected (after none was selected) */
BATADV_UEV_ADD = 0,
+
+ /**
+ * @BATADV_UEV_DEL: selected gateway was removed and none is selected
+ * anymore
+ */
BATADV_UEV_DEL,
+
+ /**
+ * @BATADV_UEV_CHANGE: a different gateway was selected as based gateway
+ */
BATADV_UEV_CHANGE,
+
+ /**
+ * @BATADV_UEV_LOOPDETECT: loop was detected which cannot be handled by
+ * bridge loop avoidance
+ */
BATADV_UEV_LOOPDETECT,
};
+/**
+ * enum batadv_uev_type - Type of uevent
+ */
enum batadv_uev_type {
+ /** @BATADV_UEV_GW: selected gateway was modified */
BATADV_UEV_GW = 0,
+
+ /** @BATADV_UEV_BLA: bridge loop avoidance event */
BATADV_UEV_BLA,
};
@@ -184,16 +217,14 @@ enum batadv_uev_type {
/* Kernel headers */
-#include <linux/bitops.h> /* for packet.h */
#include <linux/compiler.h>
#include <linux/etherdevice.h>
-#include <linux/if_ether.h> /* for packet.h */
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
#include <linux/percpu.h>
#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
-#include "packet.h"
#include "types.h"
struct net_device;
@@ -202,7 +233,7 @@ struct seq_file;
struct sk_buff;
/**
- * batadv_print_vid - return printable version of vid information
+ * batadv_print_vid() - return printable version of vid information
* @vid: the VLAN identifier
*
* Return: -1 when no VLAN is used, VLAN id otherwise
@@ -238,7 +269,7 @@ void batadv_recv_handler_unregister(u8 packet_type);
__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
/**
- * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
+ * batadv_compare_eth() - Compare two not u16 aligned Ethernet addresses
* @data1: Pointer to a six-byte array containing the Ethernet address
* @data2: Pointer other six-byte array containing the Ethernet address
*
@@ -252,7 +283,7 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2)
}
/**
- * batadv_has_timed_out - compares current time (jiffies) and timestamp +
+ * batadv_has_timed_out() - compares current time (jiffies) and timestamp +
* timeout
* @timestamp: base value to compare with (in jiffies)
* @timeout: added to base value before comparing (in milliseconds)
@@ -265,40 +296,96 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
return time_is_before_jiffies(timestamp + msecs_to_jiffies(timeout));
}
+/**
+ * batadv_atomic_dec_not_zero() - Decrease unless the number is 0
+ * @v: pointer of type atomic_t
+ *
+ * Return: non-zero if v was not 0, and zero otherwise.
+ */
#define batadv_atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
-/* Returns the smallest signed integer in two's complement with the sizeof x */
+/**
+ * batadv_smallest_signed_int() - Returns the smallest signed integer in two's
+ * complement with the sizeof x
+ * @x: type of integer
+ *
+ * Return: smallest signed integer of type
+ */
#define batadv_smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u)))
-/* Checks if a sequence number x is a predecessor/successor of y.
- * they handle overflows/underflows and can correctly check for a
- * predecessor/successor unless the variable sequence number has grown by
- * more then 2**(bitwidth(x)-1)-1.
+/**
+ * batadv_seq_before() - Checks if a sequence number x is a predecessor of y
+ * @x: potential predecessor of @y
+ * @y: value to compare @x against
+ *
+ * It handles overflows/underflows and can correctly check for a predecessor
+ * unless the variable sequence number has grown by more then
+ * 2**(bitwidth(x)-1)-1.
+ *
* This means that for a u8 with the maximum value 255, it would think:
- * - when adding nothing - it is neither a predecessor nor a successor
- * - before adding more than 127 to the starting value - it is a predecessor,
- * - when adding 128 - it is neither a predecessor nor a successor,
- * - after adding more than 127 to the starting value - it is a successor
+ *
+ * * when adding nothing - it is neither a predecessor nor a successor
+ * * before adding more than 127 to the starting value - it is a predecessor,
+ * * when adding 128 - it is neither a predecessor nor a successor,
+ * * after adding more than 127 to the starting value - it is a successor
+ *
+ * Return: true when x is a predecessor of y, false otherwise
*/
#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
typeof(y)_d2 = (y); \
typeof(x)_dummy = (_d1 - _d2); \
(void)(&_d1 == &_d2); \
_dummy > batadv_smallest_signed_int(_dummy); })
+
+/**
+ * batadv_seq_after() - Checks if a sequence number x is a successor of y
+ * @x: potential sucessor of @y
+ * @y: value to compare @x against
+ *
+ * It handles overflows/underflows and can correctly check for a successor
+ * unless the variable sequence number has grown by more then
+ * 2**(bitwidth(x)-1)-1.
+ *
+ * This means that for a u8 with the maximum value 255, it would think:
+ *
+ * * when adding nothing - it is neither a predecessor nor a successor
+ * * before adding more than 127 to the starting value - it is a predecessor,
+ * * when adding 128 - it is neither a predecessor nor a successor,
+ * * after adding more than 127 to the starting value - it is a successor
+ *
+ * Return: true when x is a successor of y, false otherwise
+ */
#define batadv_seq_after(x, y) batadv_seq_before(y, x)
-/* Stop preemption on local cpu while incrementing the counter */
+/**
+ * batadv_add_counter() - Add to per cpu statistics counter of soft interface
+ * @bat_priv: the bat priv with all the soft interface information
+ * @idx: counter index which should be modified
+ * @count: value to increase counter by
+ *
+ * Stop preemption on local cpu while incrementing the counter
+ */
static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
size_t count)
{
this_cpu_add(bat_priv->bat_counters[idx], count);
}
+/**
+ * batadv_inc_counter() - Increase per cpu statistics counter of soft interface
+ * @b: the bat priv with all the soft interface information
+ * @i: counter index which should be modified
+ */
#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
-/* Define a macro to reach the control buffer of the skb. The members of the
- * control buffer are defined in struct batadv_skb_cb in types.h.
- * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
+/**
+ * BATADV_SKB_CB() - Get batadv_skb_cb from skb control buffer
+ * @__skb: skb holding the control buffer
+ *
+ * The members of the control buffer are defined in struct batadv_skb_cb in
+ * types.h. The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
+ *
+ * Return: pointer to the batadv_skb_cb of the skb
*/
#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index e553a8770a89..cbdeb47ec3f6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
@@ -24,7 +25,7 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/icmpv6.h>
#include <linux/if_bridge.h>
#include <linux/if_ether.h>
@@ -54,18 +55,18 @@
#include <net/if_inet6.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <uapi/linux/batadv_packet.h>
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
-#include "packet.h"
#include "translation-table.h"
#include "tvlv.h"
static void batadv_mcast_mla_update(struct work_struct *work);
/**
- * batadv_mcast_start_timer - schedule the multicast periodic worker
+ * batadv_mcast_start_timer() - schedule the multicast periodic worker
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_mcast_start_timer(struct batadv_priv *bat_priv)
@@ -75,7 +76,7 @@ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
+ * batadv_mcast_get_bridge() - get the bridge on top of the softif if it exists
* @soft_iface: netdev struct of the mesh interface
*
* If the given soft interface has a bridge on top then the refcount
@@ -101,7 +102,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
}
/**
- * batadv_mcast_mla_softif_get - get softif multicast listeners
+ * batadv_mcast_mla_softif_get() - get softif multicast listeners
* @dev: the device to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -147,7 +148,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
}
/**
- * batadv_mcast_mla_is_duplicate - check whether an address is in a list
+ * batadv_mcast_mla_is_duplicate() - check whether an address is in a list
* @mcast_addr: the multicast address to check
* @mcast_list: the list with multicast addresses to search in
*
@@ -167,7 +168,7 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
}
/**
- * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address
+ * batadv_mcast_mla_br_addr_cpy() - copy a bridge multicast address
* @dst: destination to write to - a multicast MAC address
* @src: source to read from - a multicast IP address
*
@@ -191,7 +192,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
}
/**
- * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners
+ * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
* @dev: a bridge slave whose bridge to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -244,7 +245,7 @@ out:
}
/**
- * batadv_mcast_mla_list_free - free a list of multicast addresses
+ * batadv_mcast_mla_list_free() - free a list of multicast addresses
* @mcast_list: the list to free
*
* Removes and frees all items in the given mcast_list.
@@ -261,7 +262,7 @@ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
}
/**
- * batadv_mcast_mla_tt_retract - clean up multicast listener announcements
+ * batadv_mcast_mla_tt_retract() - clean up multicast listener announcements
* @bat_priv: the bat priv with all the soft interface information
* @mcast_list: a list of addresses which should _not_ be removed
*
@@ -297,7 +298,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_mla_tt_add - add multicast listener announcements
+ * batadv_mcast_mla_tt_add() - add multicast listener announcements
* @bat_priv: the bat priv with all the soft interface information
* @mcast_list: a list of addresses which are going to get added
*
@@ -333,7 +334,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_has_bridge - check whether the soft-iface is bridged
+ * batadv_mcast_has_bridge() - check whether the soft-iface is bridged
* @bat_priv: the bat priv with all the soft interface information
*
* Checks whether there is a bridge on top of our soft interface.
@@ -354,7 +355,8 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_querier_log - debug output regarding the querier status on link
+ * batadv_mcast_querier_log() - debug output regarding the querier status on
+ * link
* @bat_priv: the bat priv with all the soft interface information
* @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD")
* @old_state: the previous querier state on our link
@@ -405,7 +407,8 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
}
/**
- * batadv_mcast_bridge_log - debug output for topology changes in bridged setups
+ * batadv_mcast_bridge_log() - debug output for topology changes in bridged
+ * setups
* @bat_priv: the bat priv with all the soft interface information
* @bridged: a flag about whether the soft interface is currently bridged or not
* @querier_ipv4: (maybe) new status of a potential, selected IGMP querier
@@ -444,7 +447,7 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged,
}
/**
- * batadv_mcast_flags_logs - output debug information about mcast flag changes
+ * batadv_mcast_flags_logs() - output debug information about mcast flag changes
* @bat_priv: the bat priv with all the soft interface information
* @flags: flags indicating the new multicast state
*
@@ -470,7 +473,7 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
}
/**
- * batadv_mcast_mla_tvlv_update - update multicast tvlv
+ * batadv_mcast_mla_tvlv_update() - update multicast tvlv
* @bat_priv: the bat priv with all the soft interface information
*
* Updates the own multicast tvlv with our current multicast related settings,
@@ -545,7 +548,7 @@ update:
}
/**
- * __batadv_mcast_mla_update - update the own MLAs
+ * __batadv_mcast_mla_update() - update the own MLAs
* @bat_priv: the bat priv with all the soft interface information
*
* Updates the own multicast listener announcements in the translation
@@ -582,7 +585,7 @@ out:
}
/**
- * batadv_mcast_mla_update - update the own MLAs
+ * batadv_mcast_mla_update() - update the own MLAs
* @work: kernel work struct
*
* Updates the own multicast listener announcements in the translation
@@ -605,7 +608,7 @@ static void batadv_mcast_mla_update(struct work_struct *work)
}
/**
- * batadv_mcast_is_report_ipv4 - check for IGMP reports
+ * batadv_mcast_is_report_ipv4() - check for IGMP reports
* @skb: the ethernet frame destined for the mesh
*
* This call might reallocate skb data.
@@ -630,7 +633,8 @@ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
}
/**
- * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential
+ * batadv_mcast_forw_mode_check_ipv4() - check for optimized forwarding
+ * potential
* @bat_priv: the bat priv with all the soft interface information
* @skb: the IPv4 packet to check
* @is_unsnoopable: stores whether the destination is snoopable
@@ -671,7 +675,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_is_report_ipv6 - check for MLD reports
+ * batadv_mcast_is_report_ipv6() - check for MLD reports
* @skb: the ethernet frame destined for the mesh
*
* This call might reallocate skb data.
@@ -695,7 +699,8 @@ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
}
/**
- * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential
+ * batadv_mcast_forw_mode_check_ipv6() - check for optimized forwarding
+ * potential
* @bat_priv: the bat priv with all the soft interface information
* @skb: the IPv6 packet to check
* @is_unsnoopable: stores whether the destination is snoopable
@@ -736,7 +741,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_mode_check - check for optimized forwarding potential
+ * batadv_mcast_forw_mode_check() - check for optimized forwarding potential
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast frame to check
* @is_unsnoopable: stores whether the destination is snoopable
@@ -774,7 +779,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_want_all_ip_count - count nodes with unspecific mcast
+ * batadv_mcast_forw_want_all_ip_count() - count nodes with unspecific mcast
* interest
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: ethernet header of a packet
@@ -798,7 +803,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_tt_node_get - get a multicast tt node
+ * batadv_mcast_forw_tt_node_get() - get a multicast tt node
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: the ether header containing the multicast destination
*
@@ -814,7 +819,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_ipv4_node_get - get a node with an ipv4 flag
+ * batadv_mcast_forw_ipv4_node_get() - get a node with an ipv4 flag
* @bat_priv: the bat priv with all the soft interface information
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
@@ -841,7 +846,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_forw_ipv6_node_get - get a node with an ipv6 flag
+ * batadv_mcast_forw_ipv6_node_get() - get a node with an ipv6 flag
* @bat_priv: the bat priv with all the soft interface information
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
@@ -868,7 +873,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_forw_ip_node_get - get a node with an ipv4/ipv6 flag
+ * batadv_mcast_forw_ip_node_get() - get a node with an ipv4/ipv6 flag
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: an ethernet header to determine the protocol family from
*
@@ -892,7 +897,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_unsnoop_node_get - get a node with an unsnoopable flag
+ * batadv_mcast_forw_unsnoop_node_get() - get a node with an unsnoopable flag
* @bat_priv: the bat priv with all the soft interface information
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
@@ -919,7 +924,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_forw_mode - check on how to forward a multicast packet
+ * batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
* @skb: The multicast packet to check
* @orig: an originator to be set to forward the skb to
@@ -973,7 +978,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
/**
- * batadv_mcast_want_unsnoop_update - update unsnoop counter and list
+ * batadv_mcast_want_unsnoop_update() - update unsnoop counter and list
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node which multicast state might have changed of
* @mcast_flags: flags indicating the new multicast state
@@ -1018,7 +1023,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_want_ipv4_update - update want-all-ipv4 counter and list
+ * batadv_mcast_want_ipv4_update() - update want-all-ipv4 counter and list
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node which multicast state might have changed of
* @mcast_flags: flags indicating the new multicast state
@@ -1063,7 +1068,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_want_ipv6_update - update want-all-ipv6 counter and list
+ * batadv_mcast_want_ipv6_update() - update want-all-ipv6 counter and list
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node which multicast state might have changed of
* @mcast_flags: flags indicating the new multicast state
@@ -1108,7 +1113,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container
+ * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
* @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -1164,7 +1169,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_init - initialize the multicast optimizations structures
+ * batadv_mcast_init() - initialize the multicast optimizations structures
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_mcast_init(struct batadv_priv *bat_priv)
@@ -1179,7 +1184,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_mcast_flags_print_header - print own mcast flags to debugfs table
+ * batadv_mcast_flags_print_header() - print own mcast flags to debugfs table
* @bat_priv: the bat priv with all the soft interface information
* @seq: debugfs table seq_file struct
*
@@ -1220,7 +1225,7 @@ static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes
+ * batadv_mcast_flags_seq_print_text() - print the mcast flags of other nodes
* @seq: seq file to print on
* @offset: not used
*
@@ -1281,7 +1286,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
#endif
/**
- * batadv_mcast_free - free the multicast optimizations structures
+ * batadv_mcast_free() - free the multicast optimizations structures
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_mcast_free(struct batadv_priv *bat_priv)
@@ -1296,7 +1301,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
}
/**
- * batadv_mcast_purge_orig - reset originator global mcast state modifications
+ * batadv_mcast_purge_orig() - reset originator global mcast state modifications
* @orig: the originator which is going to get purged
*/
void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 2a78cddab0e9..3ac06337ab71 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
@@ -25,15 +26,21 @@ struct sk_buff;
/**
* enum batadv_forw_mode - the way a packet should be forwarded as
- * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
- * flooding)
- * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the
- * BATMAN unicast routing protocol)
- * @BATADV_FORW_NONE: don't forward, drop it
*/
enum batadv_forw_mode {
+ /**
+ * @BATADV_FORW_ALL: forward the packet to all nodes (currently via
+ * classic flooding)
+ */
BATADV_FORW_ALL,
+
+ /**
+ * @BATADV_FORW_SINGLE: forward the packet to a single node (currently
+ * via the BATMAN unicast routing protocol)
+ */
BATADV_FORW_SINGLE,
+
+ /** @BATADV_FORW_NONE: don't forward, drop it */
BATADV_FORW_NONE,
};
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index ab13b4d58733..a823d3899bad 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
@@ -23,8 +24,8 @@
#include <linux/cache.h>
#include <linux/errno.h>
#include <linux/export.h>
-#include <linux/fs.h>
#include <linux/genetlink.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -39,6 +40,7 @@
#include <net/genetlink.h>
#include <net/netlink.h>
#include <net/sock.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
@@ -46,7 +48,6 @@
#include "gateway_client.h"
#include "hard-interface.h"
#include "originator.h"
-#include "packet.h"
#include "soft-interface.h"
#include "tp_meter.h"
#include "translation-table.h"
@@ -99,7 +100,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
};
/**
- * batadv_netlink_get_ifindex - Extract an interface index from a message
+ * batadv_netlink_get_ifindex() - Extract an interface index from a message
* @nlh: Message header
* @attrtype: Attribute which holds an interface index
*
@@ -114,7 +115,7 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
}
/**
- * batadv_netlink_mesh_info_put - fill in generic information about mesh
+ * batadv_netlink_mesh_info_put() - fill in generic information about mesh
* interface
* @msg: netlink message to be sent back
* @soft_iface: interface for which the data should be taken
@@ -169,7 +170,7 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
}
/**
- * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO
+ * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO
* netlink request
* @skb: received netlink message
* @info: receiver information
@@ -230,7 +231,7 @@ batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
}
/**
- * batadv_netlink_tp_meter_put - Fill information of started tp_meter session
+ * batadv_netlink_tp_meter_put() - Fill information of started tp_meter session
* @msg: netlink message to be sent back
* @cookie: tp meter session cookie
*
@@ -246,7 +247,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
}
/**
- * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client
+ * batadv_netlink_tpmeter_notify() - send tp_meter result via netlink to client
* @bat_priv: the bat priv with all the soft interface information
* @dst: destination of tp_meter session
* @result: reason for tp meter session stop
@@ -309,7 +310,7 @@ err_genlmsg:
}
/**
- * batadv_netlink_tp_meter_start - Start a new tp_meter session
+ * batadv_netlink_tp_meter_start() - Start a new tp_meter session
* @skb: received netlink message
* @info: receiver information
*
@@ -386,7 +387,7 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
}
/**
- * batadv_netlink_tp_meter_start - Cancel a running tp_meter session
+ * batadv_netlink_tp_meter_start() - Cancel a running tp_meter session
* @skb: received netlink message
* @info: receiver information
*
@@ -431,7 +432,7 @@ out:
}
/**
- * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
+ * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -473,7 +474,7 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_netlink_dump_hardifs - Dump all hard interface into a messages
+ * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages
* @msg: Netlink message to dump into
* @cb: Parameters from query
*
@@ -620,7 +621,7 @@ struct genl_family batadv_netlink_family __ro_after_init = {
};
/**
- * batadv_netlink_register - register batadv genl netlink family
+ * batadv_netlink_register() - register batadv genl netlink family
*/
void __init batadv_netlink_register(void)
{
@@ -632,7 +633,7 @@ void __init batadv_netlink_register(void)
}
/**
- * batadv_netlink_unregister - unregister batadv genl netlink family
+ * batadv_netlink_unregister() - unregister batadv genl netlink family
*/
void batadv_netlink_unregister(void)
{
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index f1cd8c5da966..0e7e57b69b54 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 3604d7899e2c..b48116bb24ef 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
@@ -25,7 +26,7 @@
#include <linux/debugfs.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/init.h>
@@ -35,6 +36,7 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
+#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/random.h>
@@ -47,12 +49,12 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
+#include <uapi/linux/batadv_packet.h>
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
#include "originator.h"
-#include "packet.h"
#include "routing.h"
#include "send.h"
#include "tvlv.h"
@@ -65,7 +67,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
/**
- * batadv_nc_init - one-time initialization for network coding
+ * batadv_nc_init() - one-time initialization for network coding
*
* Return: 0 on success or negative error number in case of failure
*/
@@ -81,7 +83,7 @@ int __init batadv_nc_init(void)
}
/**
- * batadv_nc_start_timer - initialise the nc periodic worker
+ * batadv_nc_start_timer() - initialise the nc periodic worker
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
@@ -91,7 +93,7 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_nc_tvlv_container_update - update the network coding tvlv container
+ * batadv_nc_tvlv_container_update() - update the network coding tvlv container
* after network coding setting change
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -113,7 +115,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv)
}
/**
- * batadv_nc_status_update - update the network coding tvlv container after
+ * batadv_nc_status_update() - update the network coding tvlv container after
* network coding setting change
* @net_dev: the soft interface net device
*/
@@ -125,7 +127,7 @@ void batadv_nc_status_update(struct net_device *net_dev)
}
/**
- * batadv_nc_tvlv_ogm_handler_v1 - process incoming nc tvlv container
+ * batadv_nc_tvlv_ogm_handler_v1() - process incoming nc tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
* @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -144,7 +146,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_mesh_init - initialise coding hash table and start house keeping
+ * batadv_nc_mesh_init() - initialise coding hash table and start house keeping
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success or negative error number in case of failure
@@ -185,7 +187,7 @@ err:
}
/**
- * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables
+ * batadv_nc_init_bat_priv() - initialise the nc specific bat_priv variables
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
@@ -197,7 +199,7 @@ void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
}
/**
- * batadv_nc_init_orig - initialise the nc fields of an orig_node
+ * batadv_nc_init_orig() - initialise the nc fields of an orig_node
* @orig_node: the orig_node which is going to be initialised
*/
void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
@@ -209,8 +211,8 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
}
/**
- * batadv_nc_node_release - release nc_node from lists and queue for free after
- * rcu grace period
+ * batadv_nc_node_release() - release nc_node from lists and queue for free
+ * after rcu grace period
* @ref: kref pointer of the nc_node
*/
static void batadv_nc_node_release(struct kref *ref)
@@ -224,7 +226,7 @@ static void batadv_nc_node_release(struct kref *ref)
}
/**
- * batadv_nc_node_put - decrement the nc_node refcounter and possibly
+ * batadv_nc_node_put() - decrement the nc_node refcounter and possibly
* release it
* @nc_node: nc_node to be free'd
*/
@@ -234,8 +236,8 @@ static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
}
/**
- * batadv_nc_path_release - release nc_path from lists and queue for free after
- * rcu grace period
+ * batadv_nc_path_release() - release nc_path from lists and queue for free
+ * after rcu grace period
* @ref: kref pointer of the nc_path
*/
static void batadv_nc_path_release(struct kref *ref)
@@ -248,7 +250,7 @@ static void batadv_nc_path_release(struct kref *ref)
}
/**
- * batadv_nc_path_put - decrement the nc_path refcounter and possibly
+ * batadv_nc_path_put() - decrement the nc_path refcounter and possibly
* release it
* @nc_path: nc_path to be free'd
*/
@@ -258,7 +260,7 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
}
/**
- * batadv_nc_packet_free - frees nc packet
+ * batadv_nc_packet_free() - frees nc packet
* @nc_packet: the nc packet to free
* @dropped: whether the packet is freed because is is dropped
*/
@@ -275,7 +277,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet,
}
/**
- * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged
+ * batadv_nc_to_purge_nc_node() - checks whether an nc node has to be purged
* @bat_priv: the bat priv with all the soft interface information
* @nc_node: the nc node to check
*
@@ -291,7 +293,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out
+ * batadv_nc_to_purge_nc_path_coding() - checks whether an nc path has timed out
* @bat_priv: the bat priv with all the soft interface information
* @nc_path: the nc path to check
*
@@ -311,7 +313,8 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out
+ * batadv_nc_to_purge_nc_path_decoding() - checks whether an nc path has timed
+ * out
* @bat_priv: the bat priv with all the soft interface information
* @nc_path: the nc path to check
*
@@ -331,7 +334,7 @@ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale
+ * batadv_nc_purge_orig_nc_nodes() - go through list of nc nodes and purge stale
* entries
* @bat_priv: the bat priv with all the soft interface information
* @list: list of nc nodes
@@ -369,7 +372,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_purge_orig - purges all nc node data attached of the given
+ * batadv_nc_purge_orig() - purges all nc node data attached of the given
* originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig_node with the nc node entries to be purged
@@ -395,8 +398,8 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they
- * have timed out nc nodes
+ * batadv_nc_purge_orig_hash() - traverse entire originator hash to check if
+ * they have timed out nc nodes
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
@@ -422,7 +425,7 @@ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
}
/**
- * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove
+ * batadv_nc_purge_paths() - traverse all nc paths part of the hash and remove
* unused ones
* @bat_priv: the bat priv with all the soft interface information
* @hash: hash table containing the nc paths to check
@@ -481,7 +484,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_hash_key_gen - computes the nc_path hash key
+ * batadv_nc_hash_key_gen() - computes the nc_path hash key
* @key: buffer to hold the final hash key
* @src: source ethernet mac address going into the hash key
* @dst: destination ethernet mac address going into the hash key
@@ -494,7 +497,7 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
}
/**
- * batadv_nc_hash_choose - compute the hash value for an nc path
+ * batadv_nc_hash_choose() - compute the hash value for an nc path
* @data: data to hash
* @size: size of the hash table
*
@@ -512,7 +515,7 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size)
}
/**
- * batadv_nc_hash_compare - comparing function used in the network coding hash
+ * batadv_nc_hash_compare() - comparing function used in the network coding hash
* tables
* @node: node in the local table
* @data2: second object to compare the node to
@@ -538,7 +541,7 @@ static bool batadv_nc_hash_compare(const struct hlist_node *node,
}
/**
- * batadv_nc_hash_find - search for an existing nc path and return it
+ * batadv_nc_hash_find() - search for an existing nc path and return it
* @hash: hash table containing the nc path
* @data: search key
*
@@ -575,7 +578,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash,
}
/**
- * batadv_nc_send_packet - send non-coded packet and free nc_packet struct
+ * batadv_nc_send_packet() - send non-coded packet and free nc_packet struct
* @nc_packet: the nc packet to send
*/
static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
@@ -586,7 +589,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
}
/**
- * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet.
+ * batadv_nc_sniffed_purge() - Checks timestamp of given sniffed nc_packet.
* @bat_priv: the bat priv with all the soft interface information
* @nc_path: the nc path the packet belongs to
* @nc_packet: the nc packet to be checked
@@ -625,7 +628,7 @@ out:
}
/**
- * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet.
+ * batadv_nc_fwd_flush() - Checks the timestamp of the given nc packet.
* @bat_priv: the bat priv with all the soft interface information
* @nc_path: the nc path the packet belongs to
* @nc_packet: the nc packet to be checked
@@ -663,8 +666,8 @@ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out
- * nc packets
+ * batadv_nc_process_nc_paths() - traverse given nc packet pool and free timed
+ * out nc packets
* @bat_priv: the bat priv with all the soft interface information
* @hash: to be processed hash table
* @process_fn: Function called to process given nc packet. Should return true
@@ -709,7 +712,8 @@ batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_worker - periodic task for house keeping related to network coding
+ * batadv_nc_worker() - periodic task for house keeping related to network
+ * coding
* @work: kernel work struct
*/
static void batadv_nc_worker(struct work_struct *work)
@@ -749,8 +753,8 @@ static void batadv_nc_worker(struct work_struct *work)
}
/**
- * batadv_can_nc_with_orig - checks whether the given orig node is suitable for
- * coding or not
+ * batadv_can_nc_with_orig() - checks whether the given orig node is suitable
+ * for coding or not
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: neighboring orig node which may be used as nc candidate
* @ogm_packet: incoming ogm packet also used for the checks
@@ -790,7 +794,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_find_nc_node - search for an existing nc node and return it
+ * batadv_nc_find_nc_node() - search for an existing nc node and return it
* @orig_node: orig node originating the ogm packet
* @orig_neigh_node: neighboring orig node from which we received the ogm packet
* (can be equal to orig_node)
@@ -830,7 +834,7 @@ batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
}
/**
- * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was
+ * batadv_nc_get_nc_node() - retrieves an nc node or creates the entry if it was
* not found
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node originating the ogm packet
@@ -890,7 +894,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node
+ * batadv_nc_update_nc_node() - updates stored incoming and outgoing nc node
* structs (best called on incoming OGMs)
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node originating the ogm packet
@@ -945,7 +949,7 @@ out:
}
/**
- * batadv_nc_get_path - get existing nc_path or allocate a new one
+ * batadv_nc_get_path() - get existing nc_path or allocate a new one
* @bat_priv: the bat priv with all the soft interface information
* @hash: hash table containing the nc path
* @src: ethernet source address - first half of the nc path search key
@@ -1006,7 +1010,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
+ * batadv_nc_random_weight_tq() - scale the receivers TQ-value to avoid unfair
* selection of a receiver with slightly lower TQ than the other
* @tq: to be weighted tq value
*
@@ -1029,7 +1033,7 @@ static u8 batadv_nc_random_weight_tq(u8 tq)
}
/**
- * batadv_nc_memxor - XOR destination with source
+ * batadv_nc_memxor() - XOR destination with source
* @dst: byte array to XOR into
* @src: byte array to XOR from
* @len: length of destination array
@@ -1043,7 +1047,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
}
/**
- * batadv_nc_code_packets - code a received unicast_packet with an nc packet
+ * batadv_nc_code_packets() - code a received unicast_packet with an nc packet
* into a coded_packet and send it
* @bat_priv: the bat priv with all the soft interface information
* @skb: data skb to forward
@@ -1236,7 +1240,7 @@ out:
}
/**
- * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst.
+ * batadv_nc_skb_coding_possible() - true if a decoded skb is available at dst.
* @skb: data skb to forward
* @dst: destination mac address of the other skb to code with
* @src: source mac address of skb
@@ -1260,7 +1264,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
}
/**
- * batadv_nc_path_search - Find the coding path matching in_nc_node and
+ * batadv_nc_path_search() - Find the coding path matching in_nc_node and
* out_nc_node to retrieve a buffered packet that can be used for coding.
* @bat_priv: the bat priv with all the soft interface information
* @in_nc_node: pointer to skb next hop's neighbor nc node
@@ -1328,8 +1332,8 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the
- * skb's sender (may be equal to the originator).
+ * batadv_nc_skb_src_search() - Loops through the list of neighoring nodes of
+ * the skb's sender (may be equal to the originator).
* @bat_priv: the bat priv with all the soft interface information
* @skb: data skb to forward
* @eth_dst: next hop mac address of skb
@@ -1374,7 +1378,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the
+ * batadv_nc_skb_store_before_coding() - set the ethernet src and dst of the
* unicast skb before it is stored for use in later decoding
* @bat_priv: the bat priv with all the soft interface information
* @skb: data skb to store
@@ -1409,7 +1413,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst.
+ * batadv_nc_skb_dst_search() - Loops through list of neighboring nodes to dst.
* @skb: data skb to forward
* @neigh_node: next hop to forward packet to
* @ethhdr: pointer to the ethernet header inside the skb
@@ -1467,7 +1471,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
}
/**
- * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding
+ * batadv_nc_skb_add_to_path() - buffer skb for later encoding / decoding
* @skb: skb to add to path
* @nc_path: path to add skb to
* @neigh_node: next hop to forward packet to
@@ -1502,7 +1506,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
}
/**
- * batadv_nc_skb_forward - try to code a packet or add it to the coding packet
+ * batadv_nc_skb_forward() - try to code a packet or add it to the coding packet
* buffer
* @skb: data skb to forward
* @neigh_node: next hop to forward packet to
@@ -1559,8 +1563,8 @@ out:
}
/**
- * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used
- * when decoding coded packets
+ * batadv_nc_skb_store_for_decoding() - save a clone of the skb which can be
+ * used when decoding coded packets
* @bat_priv: the bat priv with all the soft interface information
* @skb: data skb to store
*/
@@ -1620,7 +1624,7 @@ out:
}
/**
- * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet
+ * batadv_nc_skb_store_sniffed_unicast() - check if a received unicast packet
* should be saved in the decoding buffer and, if so, store it there
* @bat_priv: the bat priv with all the soft interface information
* @skb: unicast skb to store
@@ -1640,7 +1644,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_skb_decode_packet - decode given skb using the decode data stored
+ * batadv_nc_skb_decode_packet() - decode given skb using the decode data stored
* in nc_packet
* @bat_priv: the bat priv with all the soft interface information
* @skb: unicast skb to decode
@@ -1734,7 +1738,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
/**
- * batadv_nc_find_decoding_packet - search through buffered decoding data to
+ * batadv_nc_find_decoding_packet() - search through buffered decoding data to
* find the data needed to decode the coded packet
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: pointer to the ethernet header inside the coded packet
@@ -1799,7 +1803,7 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the
+ * batadv_nc_recv_coded_packet() - try to decode coded packet and enqueue the
* resulting unicast packet
* @skb: incoming coded packet
* @recv_if: pointer to interface this packet was received on
@@ -1874,7 +1878,7 @@ free_skb:
}
/**
- * batadv_nc_mesh_free - clean up network coding memory
+ * batadv_nc_mesh_free() - clean up network coding memory
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
@@ -1891,7 +1895,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_nc_nodes_seq_print_text - print the nc node information
+ * batadv_nc_nodes_seq_print_text() - print the nc node information
* @seq: seq file to print on
* @offset: not used
*
@@ -1954,7 +1958,7 @@ out:
}
/**
- * batadv_nc_init_debugfs - create nc folder and related files in debugfs
+ * batadv_nc_init_debugfs() - create nc folder and related files in debugfs
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success or negative error number in case of failure
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index c66efb81d2f4..adaeafa4f71e 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 2967b86c13da..58a7d9274435 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -21,7 +22,7 @@
#include <linux/atomic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
@@ -30,10 +31,12 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
+#include <linux/rcupdate.h>
#include <linux/seq_file.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/stddef.h>
#include <linux/workqueue.h>
#include <net/sock.h>
#include <uapi/linux/batman_adv.h>
@@ -55,10 +58,47 @@
/* hash class keys */
static struct lock_class_key batadv_orig_hash_lock_class_key;
+/**
+ * batadv_orig_hash_find() - Find and return originator from orig_hash
+ * @bat_priv: the bat priv with all the soft interface information
+ * @data: mac address of the originator
+ *
+ * Return: orig_node (with increased refcnt), NULL on errors
+ */
+struct batadv_orig_node *
+batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct hlist_head *head;
+ struct batadv_orig_node *orig_node, *orig_node_tmp = NULL;
+ int index;
+
+ if (!hash)
+ return NULL;
+
+ index = batadv_choose_orig(data, hash->size);
+ head = &hash->table[index];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (!batadv_compare_eth(orig_node, data))
+ continue;
+
+ if (!kref_get_unless_zero(&orig_node->refcount))
+ continue;
+
+ orig_node_tmp = orig_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return orig_node_tmp;
+}
+
static void batadv_purge_orig(struct work_struct *work);
/**
- * batadv_compare_orig - comparing function used in the originator hash table
+ * batadv_compare_orig() - comparing function used in the originator hash table
* @node: node in the local table
* @data2: second object to compare the node to
*
@@ -73,7 +113,7 @@ bool batadv_compare_orig(const struct hlist_node *node, const void *data2)
}
/**
- * batadv_orig_node_vlan_get - get an orig_node_vlan object
+ * batadv_orig_node_vlan_get() - get an orig_node_vlan object
* @orig_node: the originator serving the VLAN
* @vid: the VLAN identifier
*
@@ -104,7 +144,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
}
/**
- * batadv_orig_node_vlan_new - search and possibly create an orig_node_vlan
+ * batadv_orig_node_vlan_new() - search and possibly create an orig_node_vlan
* object
* @orig_node: the originator serving the VLAN
* @vid: the VLAN identifier
@@ -145,7 +185,7 @@ out:
}
/**
- * batadv_orig_node_vlan_release - release originator-vlan object from lists
+ * batadv_orig_node_vlan_release() - release originator-vlan object from lists
* and queue for free after rcu grace period
* @ref: kref pointer of the originator-vlan object
*/
@@ -159,7 +199,7 @@ static void batadv_orig_node_vlan_release(struct kref *ref)
}
/**
- * batadv_orig_node_vlan_put - decrement the refcounter and possibly release
+ * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
* the originator-vlan object
* @orig_vlan: the originator-vlan object to release
*/
@@ -168,6 +208,12 @@ void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
}
+/**
+ * batadv_originator_init() - Initialize all originator structures
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_originator_init(struct batadv_priv *bat_priv)
{
if (bat_priv->orig_hash)
@@ -193,7 +239,7 @@ err:
}
/**
- * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
+ * batadv_neigh_ifinfo_release() - release neigh_ifinfo from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the neigh_ifinfo
*/
@@ -210,7 +256,7 @@ static void batadv_neigh_ifinfo_release(struct kref *ref)
}
/**
- * batadv_neigh_ifinfo_put - decrement the refcounter and possibly release
+ * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
* the neigh_ifinfo
* @neigh_ifinfo: the neigh_ifinfo object to release
*/
@@ -220,7 +266,7 @@ void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
}
/**
- * batadv_hardif_neigh_release - release hardif neigh node from lists and
+ * batadv_hardif_neigh_release() - release hardif neigh node from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
@@ -240,7 +286,7 @@ static void batadv_hardif_neigh_release(struct kref *ref)
}
/**
- * batadv_hardif_neigh_put - decrement the hardif neighbors refcounter
+ * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
* and possibly release it
* @hardif_neigh: hardif neigh neighbor to free
*/
@@ -250,7 +296,7 @@ void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
}
/**
- * batadv_neigh_node_release - release neigh_node from lists and queue for
+ * batadv_neigh_node_release() - release neigh_node from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
@@ -275,7 +321,7 @@ static void batadv_neigh_node_release(struct kref *ref)
}
/**
- * batadv_neigh_node_put - decrement the neighbors refcounter and possibly
+ * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
* release it
* @neigh_node: neigh neighbor to free
*/
@@ -285,7 +331,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
}
/**
- * batadv_orig_router_get - router to the originator depending on iface
+ * batadv_orig_router_get() - router to the originator depending on iface
* @orig_node: the orig node for the router
* @if_outgoing: the interface where the payload packet has been received or
* the OGM should be sent to
@@ -318,7 +364,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
}
/**
- * batadv_orig_ifinfo_get - find the ifinfo from an orig_node
+ * batadv_orig_ifinfo_get() - find the ifinfo from an orig_node
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
@@ -350,7 +396,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
}
/**
- * batadv_orig_ifinfo_new - search and possibly create an orig_ifinfo object
+ * batadv_orig_ifinfo_new() - search and possibly create an orig_ifinfo object
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
@@ -396,7 +442,7 @@ out:
}
/**
- * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node
+ * batadv_neigh_ifinfo_get() - find the ifinfo from an neigh_node
* @neigh: the neigh node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
@@ -429,7 +475,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
}
/**
- * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object
+ * batadv_neigh_ifinfo_new() - search and possibly create an neigh_ifinfo object
* @neigh: the neigh node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
*
@@ -472,7 +518,7 @@ out:
}
/**
- * batadv_neigh_node_get - retrieve a neighbour from the list
+ * batadv_neigh_node_get() - retrieve a neighbour from the list
* @orig_node: originator which the neighbour belongs to
* @hard_iface: the interface where this neighbour is connected to
* @addr: the address of the neighbour
@@ -509,7 +555,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
}
/**
- * batadv_hardif_neigh_create - create a hardif neighbour node
+ * batadv_hardif_neigh_create() - create a hardif neighbour node
* @hard_iface: the interface this neighbour is connected to
* @neigh_addr: the interface address of the neighbour to retrieve
* @orig_node: originator object representing the neighbour
@@ -555,7 +601,7 @@ out:
}
/**
- * batadv_hardif_neigh_get_or_create - retrieve or create a hardif neighbour
+ * batadv_hardif_neigh_get_or_create() - retrieve or create a hardif neighbour
* node
* @hard_iface: the interface this neighbour is connected to
* @neigh_addr: the interface address of the neighbour to retrieve
@@ -579,7 +625,7 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
}
/**
- * batadv_hardif_neigh_get - retrieve a hardif neighbour from the list
+ * batadv_hardif_neigh_get() - retrieve a hardif neighbour from the list
* @hard_iface: the interface where this neighbour is connected to
* @neigh_addr: the address of the neighbour
*
@@ -611,7 +657,7 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
}
/**
- * batadv_neigh_node_create - create a neigh node object
+ * batadv_neigh_node_create() - create a neigh node object
* @orig_node: originator object representing the neighbour
* @hard_iface: the interface where the neighbour is connected to
* @neigh_addr: the mac address of the neighbour interface
@@ -676,7 +722,7 @@ out:
}
/**
- * batadv_neigh_node_get_or_create - retrieve or create a neigh node object
+ * batadv_neigh_node_get_or_create() - retrieve or create a neigh node object
* @orig_node: originator object representing the neighbour
* @hard_iface: the interface where the neighbour is connected to
* @neigh_addr: the mac address of the neighbour interface
@@ -700,7 +746,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
+ * batadv_hardif_neigh_seq_print_text() - print the single hop neighbour list
* @seq: neighbour table seq_file struct
* @offset: not used
*
@@ -735,8 +781,8 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
#endif
/**
- * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
- * outgoing interface
+ * batadv_hardif_neigh_dump() - Dump to netlink the neighbor infos for a
+ * specific outgoing interface
* @msg: message to dump into
* @cb: parameters for the dump
*
@@ -812,7 +858,7 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
/**
- * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
+ * batadv_orig_ifinfo_release() - release orig_ifinfo from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the orig_ifinfo
*/
@@ -835,7 +881,7 @@ static void batadv_orig_ifinfo_release(struct kref *ref)
}
/**
- * batadv_orig_ifinfo_put - decrement the refcounter and possibly release
+ * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
* the orig_ifinfo
* @orig_ifinfo: the orig_ifinfo object to release
*/
@@ -845,7 +891,7 @@ void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
}
/**
- * batadv_orig_node_free_rcu - free the orig_node
+ * batadv_orig_node_free_rcu() - free the orig_node
* @rcu: rcu pointer of the orig_node
*/
static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
@@ -866,7 +912,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
}
/**
- * batadv_orig_node_release - release orig_node from lists and queue for
+ * batadv_orig_node_release() - release orig_node from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the orig_node
*/
@@ -917,7 +963,7 @@ static void batadv_orig_node_release(struct kref *ref)
}
/**
- * batadv_orig_node_put - decrement the orig node refcounter and possibly
+ * batadv_orig_node_put() - decrement the orig node refcounter and possibly
* release it
* @orig_node: the orig node to free
*/
@@ -926,6 +972,10 @@ void batadv_orig_node_put(struct batadv_orig_node *orig_node)
kref_put(&orig_node->refcount, batadv_orig_node_release);
}
+/**
+ * batadv_originator_free() - Free all originator structures
+ * @bat_priv: the bat priv with all the soft interface information
+ */
void batadv_originator_free(struct batadv_priv *bat_priv)
{
struct batadv_hashtable *hash = bat_priv->orig_hash;
@@ -959,7 +1009,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
}
/**
- * batadv_orig_node_new - creates a new orig_node
+ * batadv_orig_node_new() - creates a new orig_node
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the originator
*
@@ -1038,7 +1088,7 @@ free_orig_node:
}
/**
- * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor
+ * batadv_purge_neigh_ifinfo() - purge obsolete ifinfo entries from neighbor
* @bat_priv: the bat priv with all the soft interface information
* @neigh: orig node which is to be checked
*/
@@ -1079,7 +1129,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
}
/**
- * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator
+ * batadv_purge_orig_ifinfo() - purge obsolete ifinfo entries from originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
*
@@ -1131,7 +1181,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
}
/**
- * batadv_purge_orig_neighbors - purges neighbors from originator
+ * batadv_purge_orig_neighbors() - purges neighbors from originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
*
@@ -1189,7 +1239,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
}
/**
- * batadv_find_best_neighbor - finds the best neighbor after purging
+ * batadv_find_best_neighbor() - finds the best neighbor after purging
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
* @if_outgoing: the interface for which the metric should be compared
@@ -1224,7 +1274,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
}
/**
- * batadv_purge_orig_node - purges obsolete information from an orig_node
+ * batadv_purge_orig_node() - purges obsolete information from an orig_node
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
*
@@ -1341,12 +1391,24 @@ static void batadv_purge_orig(struct work_struct *work)
msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
}
+/**
+ * batadv_purge_orig_ref() - Purge all outdated originators
+ * @bat_priv: the bat priv with all the soft interface information
+ */
void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
{
_batadv_purge_orig(bat_priv);
}
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+/**
+ * batadv_orig_seq_print_text() - Print the originator table in a seq file
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * Return: always 0
+ */
int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1376,7 +1438,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
}
/**
- * batadv_orig_hardif_seq_print_text - writes originator infos for a specific
+ * batadv_orig_hardif_seq_print_text() - writes originator infos for a specific
* outgoing interface
* @seq: debugfs table seq_file struct
* @offset: not used
@@ -1423,7 +1485,7 @@ out:
#endif
/**
- * batadv_orig_dump - Dump to netlink the originator infos for a specific
+ * batadv_orig_dump() - Dump to netlink the originator infos for a specific
* outgoing interface
* @msg: message to dump into
* @cb: parameters for the dump
@@ -1499,6 +1561,13 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
return ret;
}
+/**
+ * batadv_orig_hash_add_if() - Add interface to originators in orig_hash
+ * @hard_iface: hard interface to add (already slave of the soft interface)
+ * @max_if_num: new number of interfaces
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
int max_if_num)
{
@@ -1534,6 +1603,13 @@ err:
return -ENOMEM;
}
+/**
+ * batadv_orig_hash_del_if() - Remove interface from originators in orig_hash
+ * @hard_iface: hard interface to remove (still slave of the soft interface)
+ * @max_if_num: new number of interfaces
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
int max_if_num)
{
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index d94220a6d21a..8e543a3cdc6c 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -23,14 +24,8 @@
#include <linux/compiler.h>
#include <linux/if_ether.h>
#include <linux/jhash.h>
-#include <linux/kref.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
-#include <linux/stddef.h>
#include <linux/types.h>
-#include "hash.h"
-
struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -89,8 +84,13 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
unsigned short vid);
void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
-/* hashfunction to choose an entry in a hash table of given size
- * hash algorithm from http://en.wikipedia.org/wiki/Hash_table
+/**
+ * batadv_choose_orig() - Return the index of the orig entry in the hash table
+ * @data: mac address of the originator node
+ * @size: the size of the hash table
+ *
+ * Return: the hash index where the object represented by @data should be
+ * stored at.
*/
static inline u32 batadv_choose_orig(const void *data, u32 size)
{
@@ -100,34 +100,7 @@ static inline u32 batadv_choose_orig(const void *data, u32 size)
return hash % size;
}
-static inline struct batadv_orig_node *
-batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
-{
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct hlist_head *head;
- struct batadv_orig_node *orig_node, *orig_node_tmp = NULL;
- int index;
-
- if (!hash)
- return NULL;
-
- index = batadv_choose_orig(data, hash->size);
- head = &hash->table[index];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- if (!batadv_compare_eth(orig_node, data))
- continue;
-
- if (!kref_get_unless_zero(&orig_node->refcount))
- continue;
-
- orig_node_tmp = orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node_tmp;
-}
+struct batadv_orig_node *
+batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data);
#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
deleted file mode 100644
index 8e8a5db197cb..000000000000
--- a/net/batman-adv/packet.h
+++ /dev/null
@@ -1,621 +0,0 @@
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _NET_BATMAN_ADV_PACKET_H_
-#define _NET_BATMAN_ADV_PACKET_H_
-
-#include <asm/byteorder.h>
-#include <linux/types.h>
-
-#define batadv_tp_is_error(n) ((u8)(n) > 127 ? 1 : 0)
-
-/**
- * enum batadv_packettype - types for batman-adv encapsulated packets
- * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
- * @BATADV_BCAST: broadcast packets carrying broadcast payload
- * @BATADV_CODED: network coded packets
- * @BATADV_ELP: echo location packets for B.A.T.M.A.N. V
- * @BATADV_OGM2: originator messages for B.A.T.M.A.N. V
- *
- * @BATADV_UNICAST: unicast packets carrying unicast payload traffic
- * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original
- * payload packet
- * @BATADV_UNICAST_4ADDR: unicast packet including the originator address of
- * the sender
- * @BATADV_ICMP: unicast packet like IP ICMP used for ping or traceroute
- * @BATADV_UNICAST_TVLV: unicast packet carrying TVLV containers
- */
-enum batadv_packettype {
- /* 0x00 - 0x3f: local packets or special rules for handling */
- BATADV_IV_OGM = 0x00,
- BATADV_BCAST = 0x01,
- BATADV_CODED = 0x02,
- BATADV_ELP = 0x03,
- BATADV_OGM2 = 0x04,
- /* 0x40 - 0x7f: unicast */
-#define BATADV_UNICAST_MIN 0x40
- BATADV_UNICAST = 0x40,
- BATADV_UNICAST_FRAG = 0x41,
- BATADV_UNICAST_4ADDR = 0x42,
- BATADV_ICMP = 0x43,
- BATADV_UNICAST_TVLV = 0x44,
-#define BATADV_UNICAST_MAX 0x7f
- /* 0x80 - 0xff: reserved */
-};
-
-/**
- * enum batadv_subtype - packet subtype for unicast4addr
- * @BATADV_P_DATA: user payload
- * @BATADV_P_DAT_DHT_GET: DHT request message
- * @BATADV_P_DAT_DHT_PUT: DHT store message
- * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT
- */
-enum batadv_subtype {
- BATADV_P_DATA = 0x01,
- BATADV_P_DAT_DHT_GET = 0x02,
- BATADV_P_DAT_DHT_PUT = 0x03,
- BATADV_P_DAT_CACHE_REPLY = 0x04,
-};
-
-/* this file is included by batctl which needs these defines */
-#define BATADV_COMPAT_VERSION 15
-
-/**
- * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets
- * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was
- * previously received from someone else than the best neighbor.
- * @BATADV_PRIMARIES_FIRST_HOP: flag unused.
- * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a
- * one hop neighbor on the interface where it was originally received.
- */
-enum batadv_iv_flags {
- BATADV_NOT_BEST_NEXT_HOP = BIT(0),
- BATADV_PRIMARIES_FIRST_HOP = BIT(1),
- BATADV_DIRECTLINK = BIT(2),
-};
-
-/* ICMP message types */
-enum batadv_icmp_packettype {
- BATADV_ECHO_REPLY = 0,
- BATADV_DESTINATION_UNREACHABLE = 3,
- BATADV_ECHO_REQUEST = 8,
- BATADV_TTL_EXCEEDED = 11,
- BATADV_PARAMETER_PROBLEM = 12,
- BATADV_TP = 15,
-};
-
-/**
- * enum batadv_mcast_flags - flags for multicast capabilities and settings
- * @BATADV_MCAST_WANT_ALL_UNSNOOPABLES: we want all packets destined for
- * 224.0.0.0/24 or ff02::1
- * @BATADV_MCAST_WANT_ALL_IPV4: we want all IPv4 multicast packets
- * @BATADV_MCAST_WANT_ALL_IPV6: we want all IPv6 multicast packets
- */
-enum batadv_mcast_flags {
- BATADV_MCAST_WANT_ALL_UNSNOOPABLES = BIT(0),
- BATADV_MCAST_WANT_ALL_IPV4 = BIT(1),
- BATADV_MCAST_WANT_ALL_IPV6 = BIT(2),
-};
-
-/* tt data subtypes */
-#define BATADV_TT_DATA_TYPE_MASK 0x0F
-
-/**
- * enum batadv_tt_data_flags - flags for tt data tvlv
- * @BATADV_TT_OGM_DIFF: TT diff propagated through OGM
- * @BATADV_TT_REQUEST: TT request message
- * @BATADV_TT_RESPONSE: TT response message
- * @BATADV_TT_FULL_TABLE: contains full table to replace existing table
- */
-enum batadv_tt_data_flags {
- BATADV_TT_OGM_DIFF = BIT(0),
- BATADV_TT_REQUEST = BIT(1),
- BATADV_TT_RESPONSE = BIT(2),
- BATADV_TT_FULL_TABLE = BIT(4),
-};
-
-/**
- * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
- * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
- */
-enum batadv_vlan_flags {
- BATADV_VLAN_HAS_TAG = BIT(15),
-};
-
-/* claim frame types for the bridge loop avoidance */
-enum batadv_bla_claimframe {
- BATADV_CLAIM_TYPE_CLAIM = 0x00,
- BATADV_CLAIM_TYPE_UNCLAIM = 0x01,
- BATADV_CLAIM_TYPE_ANNOUNCE = 0x02,
- BATADV_CLAIM_TYPE_REQUEST = 0x03,
- BATADV_CLAIM_TYPE_LOOPDETECT = 0x04,
-};
-
-/**
- * enum batadv_tvlv_type - tvlv type definitions
- * @BATADV_TVLV_GW: gateway tvlv
- * @BATADV_TVLV_DAT: distributed arp table tvlv
- * @BATADV_TVLV_NC: network coding tvlv
- * @BATADV_TVLV_TT: translation table tvlv
- * @BATADV_TVLV_ROAM: roaming advertisement tvlv
- * @BATADV_TVLV_MCAST: multicast capability tvlv
- */
-enum batadv_tvlv_type {
- BATADV_TVLV_GW = 0x01,
- BATADV_TVLV_DAT = 0x02,
- BATADV_TVLV_NC = 0x03,
- BATADV_TVLV_TT = 0x04,
- BATADV_TVLV_ROAM = 0x05,
- BATADV_TVLV_MCAST = 0x06,
-};
-
-#pragma pack(2)
-/* the destination hardware field in the ARP frame is used to
- * transport the claim type and the group id
- */
-struct batadv_bla_claim_dst {
- u8 magic[3]; /* FF:43:05 */
- u8 type; /* bla_claimframe */
- __be16 group; /* group id */
-};
-
-#pragma pack()
-
-/**
- * struct batadv_ogm_packet - ogm (routing protocol) packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @flags: contains routing relevant flags - see enum batadv_iv_flags
- * @seqno: sequence identification
- * @orig: address of the source node
- * @prev_sender: address of the previous sender
- * @reserved: reserved byte for alignment
- * @tq: transmission quality
- * @tvlv_len: length of tvlv data following the ogm header
- */
-struct batadv_ogm_packet {
- u8 packet_type;
- u8 version;
- u8 ttl;
- u8 flags;
- __be32 seqno;
- u8 orig[ETH_ALEN];
- u8 prev_sender[ETH_ALEN];
- u8 reserved;
- u8 tq;
- __be16 tvlv_len;
- /* __packed is not needed as the struct size is divisible by 4,
- * and the largest data type in this struct has a size of 4.
- */
-};
-
-#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
-
-/**
- * struct batadv_ogm2_packet - ogm2 (routing protocol) packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the general header
- * @ttl: time to live for this packet, part of the general header
- * @flags: reseved for routing relevant flags - currently always 0
- * @seqno: sequence number
- * @orig: originator mac address
- * @tvlv_len: length of the appended tvlv buffer (in bytes)
- * @throughput: the currently flooded path throughput
- */
-struct batadv_ogm2_packet {
- u8 packet_type;
- u8 version;
- u8 ttl;
- u8 flags;
- __be32 seqno;
- u8 orig[ETH_ALEN];
- __be16 tvlv_len;
- __be32 throughput;
- /* __packed is not needed as the struct size is divisible by 4,
- * and the largest data type in this struct has a size of 4.
- */
-};
-
-#define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
-
-/**
- * struct batadv_elp_packet - elp (neighbor discovery) packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @orig: originator mac address
- * @seqno: sequence number
- * @elp_interval: currently used ELP sending interval in ms
- */
-struct batadv_elp_packet {
- u8 packet_type;
- u8 version;
- u8 orig[ETH_ALEN];
- __be32 seqno;
- __be32 elp_interval;
-};
-
-#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
-
-/**
- * struct batadv_icmp_header - common members among all the ICMP packets
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @msg_type: ICMP packet type
- * @dst: address of the destination node
- * @orig: address of the source node
- * @uid: local ICMP socket identifier
- * @align: not used - useful for alignment purposes only
- *
- * This structure is used for ICMP packets parsing only and it is never sent
- * over the wire. The alignment field at the end is there to ensure that
- * members are padded the same way as they are in real packets.
- */
-struct batadv_icmp_header {
- u8 packet_type;
- u8 version;
- u8 ttl;
- u8 msg_type; /* see ICMP message types above */
- u8 dst[ETH_ALEN];
- u8 orig[ETH_ALEN];
- u8 uid;
- u8 align[3];
-};
-
-/**
- * struct batadv_icmp_packet - ICMP packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @msg_type: ICMP packet type
- * @dst: address of the destination node
- * @orig: address of the source node
- * @uid: local ICMP socket identifier
- * @reserved: not used - useful for alignment
- * @seqno: ICMP sequence number
- */
-struct batadv_icmp_packet {
- u8 packet_type;
- u8 version;
- u8 ttl;
- u8 msg_type; /* see ICMP message types above */
- u8 dst[ETH_ALEN];
- u8 orig[ETH_ALEN];
- u8 uid;
- u8 reserved;
- __be16 seqno;
-};
-
-/**
- * struct batadv_icmp_tp_packet - ICMP TP Meter packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @msg_type: ICMP packet type
- * @dst: address of the destination node
- * @orig: address of the source node
- * @uid: local ICMP socket identifier
- * @subtype: TP packet subtype (see batadv_icmp_tp_subtype)
- * @session: TP session identifier
- * @seqno: the TP sequence number
- * @timestamp: time when the packet has been sent. This value is filled in a
- * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the
- * RTT. Since it is read only by the host which wrote it, there is no need to
- * store it using network order
- */
-struct batadv_icmp_tp_packet {
- u8 packet_type;
- u8 version;
- u8 ttl;
- u8 msg_type; /* see ICMP message types above */
- u8 dst[ETH_ALEN];
- u8 orig[ETH_ALEN];
- u8 uid;
- u8 subtype;
- u8 session[2];
- __be32 seqno;
- __be32 timestamp;
-};
-
-/**
- * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes
- * @BATADV_TP_MSG: Msg from sender to receiver
- * @BATADV_TP_ACK: acknowledgment from receiver to sender
- */
-enum batadv_icmp_tp_subtype {
- BATADV_TP_MSG = 0,
- BATADV_TP_ACK,
-};
-
-#define BATADV_RR_LEN 16
-
-/**
- * struct batadv_icmp_packet_rr - ICMP RouteRecord packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @msg_type: ICMP packet type
- * @dst: address of the destination node
- * @orig: address of the source node
- * @uid: local ICMP socket identifier
- * @rr_cur: number of entries the rr array
- * @seqno: ICMP sequence number
- * @rr: route record array
- */
-struct batadv_icmp_packet_rr {
- u8 packet_type;
- u8 version;
- u8 ttl;
- u8 msg_type; /* see ICMP message types above */
- u8 dst[ETH_ALEN];
- u8 orig[ETH_ALEN];
- u8 uid;
- u8 rr_cur;
- __be16 seqno;
- u8 rr[BATADV_RR_LEN][ETH_ALEN];
-};
-
-#define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr)
-
-/* All packet headers in front of an ethernet header have to be completely
- * divisible by 2 but not by 4 to make the payload after the ethernet
- * header again 4 bytes boundary aligned.
- *
- * A packing of 2 is necessary to avoid extra padding at the end of the struct
- * caused by a structure member which is larger than two bytes. Otherwise
- * the structure would not fulfill the previously mentioned rule to avoid the
- * misalignment of the payload after the ethernet header. It may also lead to
- * leakage of information when the padding it not initialized before sending.
- */
-#pragma pack(2)
-
-/**
- * struct batadv_unicast_packet - unicast packet for network payload
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @ttvn: translation table version number
- * @dest: originator destination of the unicast packet
- */
-struct batadv_unicast_packet {
- u8 packet_type;
- u8 version;
- u8 ttl;
- u8 ttvn; /* destination translation table version number */
- u8 dest[ETH_ALEN];
- /* "4 bytes boundary + 2 bytes" long to make the payload after the
- * following ethernet header again 4 bytes boundary aligned
- */
-};
-
-/**
- * struct batadv_unicast_4addr_packet - extended unicast packet
- * @u: common unicast packet header
- * @src: address of the source
- * @subtype: packet subtype
- * @reserved: reserved byte for alignment
- */
-struct batadv_unicast_4addr_packet {
- struct batadv_unicast_packet u;
- u8 src[ETH_ALEN];
- u8 subtype;
- u8 reserved;
- /* "4 bytes boundary + 2 bytes" long to make the payload after the
- * following ethernet header again 4 bytes boundary aligned
- */
-};
-
-/**
- * struct batadv_frag_packet - fragmented packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @dest: final destination used when routing fragments
- * @orig: originator of the fragment used when merging the packet
- * @no: fragment number within this sequence
- * @priority: priority of frame, from ToS IP precedence or 802.1p
- * @reserved: reserved byte for alignment
- * @seqno: sequence identification
- * @total_size: size of the merged packet
- */
-struct batadv_frag_packet {
- u8 packet_type;
- u8 version; /* batman version field */
- u8 ttl;
-#if defined(__BIG_ENDIAN_BITFIELD)
- u8 no:4;
- u8 priority:3;
- u8 reserved:1;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u8 reserved:1;
- u8 priority:3;
- u8 no:4;
-#else
-#error "unknown bitfield endianness"
-#endif
- u8 dest[ETH_ALEN];
- u8 orig[ETH_ALEN];
- __be16 seqno;
- __be16 total_size;
-};
-
-/**
- * struct batadv_bcast_packet - broadcast packet for network payload
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @reserved: reserved byte for alignment
- * @seqno: sequence identification
- * @orig: originator of the broadcast packet
- */
-struct batadv_bcast_packet {
- u8 packet_type;
- u8 version; /* batman version field */
- u8 ttl;
- u8 reserved;
- __be32 seqno;
- u8 orig[ETH_ALEN];
- /* "4 bytes boundary + 2 bytes" long to make the payload after the
- * following ethernet header again 4 bytes boundary aligned
- */
-};
-
-/**
- * struct batadv_coded_packet - network coded packet
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @first_source: original source of first included packet
- * @first_orig_dest: original destinal of first included packet
- * @first_crc: checksum of first included packet
- * @first_ttvn: tt-version number of first included packet
- * @second_ttl: ttl of second packet
- * @second_dest: second receiver of this coded packet
- * @second_source: original source of second included packet
- * @second_orig_dest: original destination of second included packet
- * @second_crc: checksum of second included packet
- * @second_ttvn: tt version number of second included packet
- * @coded_len: length of network coded part of the payload
- */
-struct batadv_coded_packet {
- u8 packet_type;
- u8 version; /* batman version field */
- u8 ttl;
- u8 first_ttvn;
- /* u8 first_dest[ETH_ALEN]; - saved in mac header destination */
- u8 first_source[ETH_ALEN];
- u8 first_orig_dest[ETH_ALEN];
- __be32 first_crc;
- u8 second_ttl;
- u8 second_ttvn;
- u8 second_dest[ETH_ALEN];
- u8 second_source[ETH_ALEN];
- u8 second_orig_dest[ETH_ALEN];
- __be32 second_crc;
- __be16 coded_len;
-};
-
-#pragma pack()
-
-/**
- * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
- * @packet_type: batman-adv packet type, part of the general header
- * @version: batman-adv protocol version, part of the genereal header
- * @ttl: time to live for this packet, part of the genereal header
- * @reserved: reserved field (for packet alignment)
- * @src: address of the source
- * @dst: address of the destination
- * @tvlv_len: length of tvlv data following the unicast tvlv header
- * @align: 2 bytes to align the header to a 4 byte boundary
- */
-struct batadv_unicast_tvlv_packet {
- u8 packet_type;
- u8 version; /* batman version field */
- u8 ttl;
- u8 reserved;
- u8 dst[ETH_ALEN];
- u8 src[ETH_ALEN];
- __be16 tvlv_len;
- u16 align;
-};
-
-/**
- * struct batadv_tvlv_hdr - base tvlv header struct
- * @type: tvlv container type (see batadv_tvlv_type)
- * @version: tvlv container version
- * @len: tvlv container length
- */
-struct batadv_tvlv_hdr {
- u8 type;
- u8 version;
- __be16 len;
-};
-
-/**
- * struct batadv_tvlv_gateway_data - gateway data propagated through gw tvlv
- * container
- * @bandwidth_down: advertised uplink download bandwidth
- * @bandwidth_up: advertised uplink upload bandwidth
- */
-struct batadv_tvlv_gateway_data {
- __be32 bandwidth_down;
- __be32 bandwidth_up;
-};
-
-/**
- * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
- * @flags: translation table flags (see batadv_tt_data_flags)
- * @ttvn: translation table version number
- * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by
- * one batadv_tvlv_tt_vlan_data object per announced vlan
- */
-struct batadv_tvlv_tt_data {
- u8 flags;
- u8 ttvn;
- __be16 num_vlan;
-};
-
-/**
- * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through
- * the tt tvlv container
- * @crc: crc32 checksum of the entries belonging to this vlan
- * @vid: vlan identifier
- * @reserved: unused, useful for alignment purposes
- */
-struct batadv_tvlv_tt_vlan_data {
- __be32 crc;
- __be16 vid;
- u16 reserved;
-};
-
-/**
- * struct batadv_tvlv_tt_change - translation table diff data
- * @flags: status indicators concerning the non-mesh client (see
- * batadv_tt_client_flags)
- * @reserved: reserved field - useful for alignment purposes only
- * @addr: mac address of non-mesh client that triggered this tt change
- * @vid: VLAN identifier
- */
-struct batadv_tvlv_tt_change {
- u8 flags;
- u8 reserved[3];
- u8 addr[ETH_ALEN];
- __be16 vid;
-};
-
-/**
- * struct batadv_tvlv_roam_adv - roaming advertisement
- * @client: mac address of roaming client
- * @vid: VLAN identifier
- */
-struct batadv_tvlv_roam_adv {
- u8 client[ETH_ALEN];
- __be16 vid;
-};
-
-/**
- * struct batadv_tvlv_mcast_data - payload of a multicast tvlv
- * @flags: multicast flags announced by the orig node
- * @reserved: reserved field
- */
-struct batadv_tvlv_mcast_data {
- u8 flags;
- u8 reserved[3];
-};
-
-#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 40d9bf3e5bfe..b6891e8b741c 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -33,6 +34,7 @@
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
+#include <uapi/linux/batadv_packet.h>
#include "bitarray.h"
#include "bridge_loop_avoidance.h"
@@ -43,7 +45,6 @@
#include "log.h"
#include "network-coding.h"
#include "originator.h"
-#include "packet.h"
#include "send.h"
#include "soft-interface.h"
#include "tp_meter.h"
@@ -54,7 +55,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
/**
- * _batadv_update_route - set the router for this originator
+ * _batadv_update_route() - set the router for this originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be configured
* @recv_if: the receive interface for which this route is set
@@ -118,7 +119,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
}
/**
- * batadv_update_route - set the router for this originator
+ * batadv_update_route() - set the router for this originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be configured
* @recv_if: the receive interface for which this route is set
@@ -145,7 +146,7 @@ out:
}
/**
- * batadv_window_protected - checks whether the host restarted and is in the
+ * batadv_window_protected() - checks whether the host restarted and is in the
* protection time.
* @bat_priv: the bat priv with all the soft interface information
* @seq_num_diff: difference between the current/received sequence number and
@@ -180,6 +181,14 @@ bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
return false;
}
+/**
+ * batadv_check_management_packet() - Check preconditions for management packets
+ * @skb: incoming packet buffer
+ * @hard_iface: incoming hard interface
+ * @header_len: minimal header length of packet type
+ *
+ * Return: true when management preconditions are met, false otherwise
+ */
bool batadv_check_management_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
int header_len)
@@ -212,7 +221,7 @@ bool batadv_check_management_packet(struct sk_buff *skb,
}
/**
- * batadv_recv_my_icmp_packet - receive an icmp packet locally
+ * batadv_recv_my_icmp_packet() - receive an icmp packet locally
* @bat_priv: the bat priv with all the soft interface information
* @skb: icmp packet to process
*
@@ -347,6 +356,13 @@ out:
return ret;
}
+/**
+ * batadv_recv_icmp_packet() - Process incoming icmp packet
+ * @skb: incoming packet buffer
+ * @recv_if: incoming hard interface
+ *
+ * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
+ */
int batadv_recv_icmp_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
@@ -440,7 +456,7 @@ free_skb:
}
/**
- * batadv_check_unicast_packet - Check for malformed unicast packets
+ * batadv_check_unicast_packet() - Check for malformed unicast packets
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
* @hdr_size: size of header to pull
@@ -478,7 +494,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
}
/**
- * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
+ * batadv_last_bonding_get() - Get last_bonding_candidate of orig_node
* @orig_node: originator node whose last bonding candidate should be retrieved
*
* Return: last bonding candidate of router or NULL if not found
@@ -501,7 +517,7 @@ batadv_last_bonding_get(struct batadv_orig_node *orig_node)
}
/**
- * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
+ * batadv_last_bonding_replace() - Replace last_bonding_candidate of orig_node
* @orig_node: originator node whose bonding candidates should be replaced
* @new_candidate: new bonding candidate or NULL
*/
@@ -524,7 +540,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
}
/**
- * batadv_find_router - find a suitable router for this originator
+ * batadv_find_router() - find a suitable router for this originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the destination node
* @recv_if: pointer to interface this packet was received on
@@ -741,7 +757,7 @@ free_skb:
}
/**
- * batadv_reroute_unicast_packet - update the unicast header for re-routing
+ * batadv_reroute_unicast_packet() - update the unicast header for re-routing
* @bat_priv: the bat priv with all the soft interface information
* @unicast_packet: the unicast header to be updated
* @dst_addr: the payload destination
@@ -904,7 +920,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
}
/**
- * batadv_recv_unhandled_unicast_packet - receive and process packets which
+ * batadv_recv_unhandled_unicast_packet() - receive and process packets which
* are in the unicast number space but not yet known to the implementation
* @skb: unicast tvlv packet to process
* @recv_if: pointer to interface this packet was received on
@@ -935,6 +951,13 @@ free_skb:
return NET_RX_DROP;
}
+/**
+ * batadv_recv_unicast_packet() - Process incoming unicast packet
+ * @skb: incoming packet buffer
+ * @recv_if: incoming hard interface
+ *
+ * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
+ */
int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
@@ -1036,7 +1059,7 @@ free_skb:
}
/**
- * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets
+ * batadv_recv_unicast_tvlv() - receive and process unicast tvlv packets
* @skb: unicast tvlv packet to process
* @recv_if: pointer to interface this packet was received on
*
@@ -1090,7 +1113,7 @@ free_skb:
}
/**
- * batadv_recv_frag_packet - process received fragment
+ * batadv_recv_frag_packet() - process received fragment
* @skb: the received fragment
* @recv_if: interface that the skb is received on
*
@@ -1155,6 +1178,13 @@ free_skb:
return ret;
}
+/**
+ * batadv_recv_bcast_packet() - Process incoming broadcast packet
+ * @skb: incoming packet buffer
+ * @recv_if: incoming hard interface
+ *
+ * Return: NET_RX_SUCCESS on success or NET_RX_DROP in case of failure
+ */
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 5ede16c32f15..a1289bc5f115 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 7895323fd2a7..2a5ab6f1076d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -23,7 +24,7 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
@@ -54,7 +55,7 @@
static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
/**
- * batadv_send_skb_packet - send an already prepared packet
+ * batadv_send_skb_packet() - send an already prepared packet
* @skb: the packet to send
* @hard_iface: the interface to use to send the broadcast packet
* @dst_addr: the payload destination
@@ -123,12 +124,30 @@ send_skb_err:
return NET_XMIT_DROP;
}
+/**
+ * batadv_send_broadcast_skb() - Send broadcast packet via hard interface
+ * @skb: packet to be transmitted (with batadv header and no outer eth header)
+ * @hard_iface: outgoing interface
+ *
+ * Return: A negative errno code is returned on a failure. A success does not
+ * guarantee the frame will be transmitted as it may be dropped due
+ * to congestion or traffic shaping.
+ */
int batadv_send_broadcast_skb(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface)
{
return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr);
}
+/**
+ * batadv_send_unicast_skb() - Send unicast packet to neighbor
+ * @skb: packet to be transmitted (with batadv header and no outer eth header)
+ * @neigh: neighbor which is used as next hop to destination
+ *
+ * Return: A negative errno code is returned on a failure. A success does not
+ * guarantee the frame will be transmitted as it may be dropped due
+ * to congestion or traffic shaping.
+ */
int batadv_send_unicast_skb(struct sk_buff *skb,
struct batadv_neigh_node *neigh)
{
@@ -153,7 +172,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
}
/**
- * batadv_send_skb_to_orig - Lookup next-hop and transmit skb.
+ * batadv_send_skb_to_orig() - Lookup next-hop and transmit skb.
* @skb: Packet to be transmitted.
* @orig_node: Final destination of the packet.
* @recv_if: Interface used when receiving the packet (can be NULL).
@@ -216,7 +235,7 @@ free_skb:
}
/**
- * batadv_send_skb_push_fill_unicast - extend the buffer and initialize the
+ * batadv_send_skb_push_fill_unicast() - extend the buffer and initialize the
* common fields for unicast packets
* @skb: the skb carrying the unicast header to initialize
* @hdr_size: amount of bytes to push at the beginning of the skb
@@ -249,7 +268,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
}
/**
- * batadv_send_skb_prepare_unicast - encapsulate an skb with a unicast header
+ * batadv_send_skb_prepare_unicast() - encapsulate an skb with a unicast header
* @skb: the skb containing the payload to encapsulate
* @orig_node: the destination node
*
@@ -264,7 +283,7 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
}
/**
- * batadv_send_skb_prepare_unicast_4addr - encapsulate an skb with a
+ * batadv_send_skb_prepare_unicast_4addr() - encapsulate an skb with a
* unicast 4addr header
* @bat_priv: the bat priv with all the soft interface information
* @skb: the skb containing the payload to encapsulate
@@ -308,7 +327,7 @@ out:
}
/**
- * batadv_send_skb_unicast - encapsulate and send an skb via unicast
+ * batadv_send_skb_unicast() - encapsulate and send an skb via unicast
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @packet_type: the batman unicast packet type to use
@@ -378,7 +397,7 @@ out:
}
/**
- * batadv_send_skb_via_tt_generic - send an skb via TT lookup
+ * batadv_send_skb_via_tt_generic() - send an skb via TT lookup
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @packet_type: the batman unicast packet type to use
@@ -425,7 +444,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
}
/**
- * batadv_send_skb_via_gw - send an skb via gateway lookup
+ * batadv_send_skb_via_gw() - send an skb via gateway lookup
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @vid: the vid to be used to search the translation table
@@ -452,7 +471,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
/**
- * batadv_forw_packet_free - free a forwarding packet
+ * batadv_forw_packet_free() - free a forwarding packet
* @forw_packet: The packet to free
* @dropped: whether the packet is freed because is is dropped
*
@@ -477,7 +496,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
}
/**
- * batadv_forw_packet_alloc - allocate a forwarding packet
+ * batadv_forw_packet_alloc() - allocate a forwarding packet
* @if_incoming: The (optional) if_incoming to be grabbed
* @if_outgoing: The (optional) if_outgoing to be grabbed
* @queue_left: The (optional) queue counter to decrease
@@ -543,7 +562,7 @@ err:
}
/**
- * batadv_forw_packet_was_stolen - check whether someone stole this packet
+ * batadv_forw_packet_was_stolen() - check whether someone stole this packet
* @forw_packet: the forwarding packet to check
*
* This function checks whether the given forwarding packet was claimed by
@@ -558,7 +577,7 @@ batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet)
}
/**
- * batadv_forw_packet_steal - claim a forw_packet for free()
+ * batadv_forw_packet_steal() - claim a forw_packet for free()
* @forw_packet: the forwarding packet to steal
* @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock)
*
@@ -589,7 +608,7 @@ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet,
}
/**
- * batadv_forw_packet_list_steal - claim a list of forward packets for free()
+ * batadv_forw_packet_list_steal() - claim a list of forward packets for free()
* @forw_list: the to be stolen forward packets
* @cleanup_list: a backup pointer, to be able to dispose the packet later
* @hard_iface: the interface to steal forward packets from
@@ -625,7 +644,7 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
}
/**
- * batadv_forw_packet_list_free - free a list of forward packets
+ * batadv_forw_packet_list_free() - free a list of forward packets
* @head: a list of to be freed forw_packets
*
* This function cancels the scheduling of any packet in the provided list,
@@ -649,7 +668,7 @@ static void batadv_forw_packet_list_free(struct hlist_head *head)
}
/**
- * batadv_forw_packet_queue - try to queue a forwarding packet
+ * batadv_forw_packet_queue() - try to queue a forwarding packet
* @forw_packet: the forwarding packet to queue
* @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock)
* @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list)
@@ -693,7 +712,7 @@ static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet,
}
/**
- * batadv_forw_packet_bcast_queue - try to queue a broadcast packet
+ * batadv_forw_packet_bcast_queue() - try to queue a broadcast packet
* @bat_priv: the bat priv with all the soft interface information
* @forw_packet: the forwarding packet to queue
* @send_time: timestamp (jiffies) when the packet is to be sent
@@ -712,7 +731,7 @@ batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv,
}
/**
- * batadv_forw_packet_ogmv1_queue - try to queue an OGMv1 packet
+ * batadv_forw_packet_ogmv1_queue() - try to queue an OGMv1 packet
* @bat_priv: the bat priv with all the soft interface information
* @forw_packet: the forwarding packet to queue
* @send_time: timestamp (jiffies) when the packet is to be sent
@@ -730,7 +749,7 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
}
/**
- * batadv_add_bcast_packet_to_list - queue broadcast packet for multiple sends
+ * batadv_add_bcast_packet_to_list() - queue broadcast packet for multiple sends
* @bat_priv: the bat priv with all the soft interface information
* @skb: broadcast packet to add
* @delay: number of jiffies to wait before sending
@@ -790,7 +809,7 @@ err:
}
/**
- * batadv_forw_packet_bcasts_left - check if a retransmission is necessary
+ * batadv_forw_packet_bcasts_left() - check if a retransmission is necessary
* @forw_packet: the forwarding packet to check
* @hard_iface: the interface to check on
*
@@ -818,7 +837,8 @@ batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet,
}
/**
- * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet
+ * batadv_forw_packet_bcasts_inc() - increment retransmission counter of a
+ * packet
* @forw_packet: the packet to increase the counter for
*/
static void
@@ -828,7 +848,7 @@ batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
}
/**
- * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions
+ * batadv_forw_packet_is_rebroadcast() - check packet for previous transmissions
* @forw_packet: the packet to check
*
* Return: True if this packet was transmitted before, false otherwise.
@@ -953,7 +973,7 @@ out:
}
/**
- * batadv_purge_outstanding_packets - stop/purge scheduled bcast/OGMv1 packets
+ * batadv_purge_outstanding_packets() - stop/purge scheduled bcast/OGMv1 packets
* @bat_priv: the bat priv with all the soft interface information
* @hard_iface: the hard interface to cancel and purge bcast/ogm packets on
*
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index a16b34f473ef..1e8c79093623 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -23,8 +24,7 @@
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/types.h>
-
-#include "packet.h"
+#include <uapi/linux/batadv_packet.h>
struct sk_buff;
@@ -76,7 +76,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid);
/**
- * batadv_send_skb_via_tt - send an skb via TT lookup
+ * batadv_send_skb_via_tt() - send an skb via TT lookup
* @bat_priv: the bat priv with all the soft interface information
* @skb: the payload to send
* @dst_hint: can be used to override the destination contained in the skb
@@ -97,7 +97,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
}
/**
- * batadv_send_skb_via_tt_4addr - send an skb via TT lookup
+ * batadv_send_skb_via_tt_4addr() - send an skb via TT lookup
* @bat_priv: the bat priv with all the soft interface information
* @skb: the payload to send
* @packet_subtype: the unicast 4addr packet subtype to use
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 9f673cdfecf8..900c5ce21cd4 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -26,7 +27,7 @@
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
@@ -48,6 +49,7 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
@@ -59,11 +61,17 @@
#include "multicast.h"
#include "network-coding.h"
#include "originator.h"
-#include "packet.h"
#include "send.h"
#include "sysfs.h"
#include "translation-table.h"
+/**
+ * batadv_skb_head_push() - Increase header size and move (push) head pointer
+ * @skb: packet buffer which should be modified
+ * @len: number of bytes to add
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
{
int result;
@@ -96,7 +104,7 @@ static int batadv_interface_release(struct net_device *dev)
}
/**
- * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
+ * batadv_sum_counter() - Sum the cpu-local counters for index 'idx'
* @bat_priv: the bat priv with all the soft interface information
* @idx: index of counter to sum up
*
@@ -169,7 +177,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
}
/**
- * batadv_interface_set_rx_mode - set the rx mode of a device
+ * batadv_interface_set_rx_mode() - set the rx mode of a device
* @dev: registered network device to modify
*
* We do not actually need to set any rx filters for the virtual batman
@@ -389,7 +397,7 @@ end:
}
/**
- * batadv_interface_rx - receive ethernet frame on local batman-adv interface
+ * batadv_interface_rx() - receive ethernet frame on local batman-adv interface
* @soft_iface: local interface which will receive the ethernet frame
* @skb: ethernet frame for @soft_iface
* @hdr_size: size of already parsed batman-adv header
@@ -501,8 +509,8 @@ out:
}
/**
- * batadv_softif_vlan_release - release vlan from lists and queue for free after
- * rcu grace period
+ * batadv_softif_vlan_release() - release vlan from lists and queue for free
+ * after rcu grace period
* @ref: kref pointer of the vlan object
*/
static void batadv_softif_vlan_release(struct kref *ref)
@@ -519,7 +527,7 @@ static void batadv_softif_vlan_release(struct kref *ref)
}
/**
- * batadv_softif_vlan_put - decrease the vlan object refcounter and
+ * batadv_softif_vlan_put() - decrease the vlan object refcounter and
* possibly release it
* @vlan: the vlan object to release
*/
@@ -532,7 +540,7 @@ void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
}
/**
- * batadv_softif_vlan_get - get the vlan object for a specific vid
+ * batadv_softif_vlan_get() - get the vlan object for a specific vid
* @bat_priv: the bat priv with all the soft interface information
* @vid: the identifier of the vlan object to retrieve
*
@@ -561,7 +569,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
}
/**
- * batadv_softif_create_vlan - allocate the needed resources for a new vlan
+ * batadv_softif_create_vlan() - allocate the needed resources for a new vlan
* @bat_priv: the bat priv with all the soft interface information
* @vid: the VLAN identifier
*
@@ -613,7 +621,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
}
/**
- * batadv_softif_destroy_vlan - remove and destroy a softif_vlan object
+ * batadv_softif_destroy_vlan() - remove and destroy a softif_vlan object
* @bat_priv: the bat priv with all the soft interface information
* @vlan: the object to remove
*/
@@ -631,7 +639,7 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
}
/**
- * batadv_interface_add_vid - ndo_add_vid API implementation
+ * batadv_interface_add_vid() - ndo_add_vid API implementation
* @dev: the netdev of the mesh interface
* @proto: protocol of the the vlan id
* @vid: identifier of the new vlan
@@ -689,7 +697,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
}
/**
- * batadv_interface_kill_vid - ndo_kill_vid API implementation
+ * batadv_interface_kill_vid() - ndo_kill_vid API implementation
* @dev: the netdev of the mesh interface
* @proto: protocol of the the vlan id
* @vid: identifier of the deleted vlan
@@ -732,7 +740,7 @@ static struct lock_class_key batadv_netdev_xmit_lock_key;
static struct lock_class_key batadv_netdev_addr_lock_key;
/**
- * batadv_set_lockdep_class_one - Set lockdep class for a single tx queue
+ * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
* @dev: device which owns the tx queue
* @txq: tx queue to modify
* @_unused: always NULL
@@ -745,7 +753,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev,
}
/**
- * batadv_set_lockdep_class - Set txq and addr_list lockdep class
+ * batadv_set_lockdep_class() - Set txq and addr_list lockdep class
* @dev: network device to modify
*/
static void batadv_set_lockdep_class(struct net_device *dev)
@@ -755,7 +763,7 @@ static void batadv_set_lockdep_class(struct net_device *dev)
}
/**
- * batadv_softif_init_late - late stage initialization of soft interface
+ * batadv_softif_init_late() - late stage initialization of soft interface
* @dev: registered network device to modify
*
* Return: error code on failures
@@ -860,7 +868,7 @@ free_bat_counters:
}
/**
- * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
+ * batadv_softif_slave_add() - Add a slave interface to a batadv_soft_interface
* @dev: batadv_soft_interface used as master interface
* @slave_dev: net_device which should become the slave interface
* @extack: extended ACK report struct
@@ -888,7 +896,7 @@ out:
}
/**
- * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface
+ * batadv_softif_slave_del() - Delete a slave iface from a batadv_soft_interface
* @dev: batadv_soft_interface used as master interface
* @slave_dev: net_device which should be removed from the master interface
*
@@ -1023,7 +1031,7 @@ static const struct ethtool_ops batadv_ethtool_ops = {
};
/**
- * batadv_softif_free - Deconstructor of batadv_soft_interface
+ * batadv_softif_free() - Deconstructor of batadv_soft_interface
* @dev: Device to cleanup and remove
*/
static void batadv_softif_free(struct net_device *dev)
@@ -1039,7 +1047,7 @@ static void batadv_softif_free(struct net_device *dev)
}
/**
- * batadv_softif_init_early - early stage initialization of soft interface
+ * batadv_softif_init_early() - early stage initialization of soft interface
* @dev: registered network device to modify
*/
static void batadv_softif_init_early(struct net_device *dev)
@@ -1063,6 +1071,13 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->ethtool_ops = &batadv_ethtool_ops;
}
+/**
+ * batadv_softif_create() - Create and register soft interface
+ * @net: the applicable net namespace
+ * @name: name of the new soft interface
+ *
+ * Return: newly allocated soft_interface, NULL on errors
+ */
struct net_device *batadv_softif_create(struct net *net, const char *name)
{
struct net_device *soft_iface;
@@ -1089,7 +1104,7 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
}
/**
- * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs
+ * batadv_softif_destroy_sysfs() - deletion of batadv_soft_interface via sysfs
* @soft_iface: the to-be-removed batman-adv interface
*/
void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
@@ -1111,7 +1126,8 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
}
/**
- * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink
+ * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via
+ * netlink
* @soft_iface: the to-be-removed batman-adv interface
* @head: list pointer
*/
@@ -1139,6 +1155,12 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
unregister_netdevice_queue(soft_iface, head);
}
+/**
+ * batadv_softif_is_valid() - Check whether device is a batadv soft interface
+ * @net_dev: device which should be checked
+ *
+ * Return: true when net_dev is a batman-adv interface, false otherwise
+ */
bool batadv_softif_is_valid(const struct net_device *net_dev)
{
if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 639c3abb214a..075c5b5b2ce1 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index aa187fd42475..c1578fa0b952 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -22,10 +23,11 @@
#include <linux/compiler.h>
#include <linux/device.h>
#include <linux/errno.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_vlan.h>
#include <linux/kernel.h>
+#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
@@ -37,6 +39,7 @@
#include <linux/string.h>
#include <linux/stringify.h>
#include <linux/workqueue.h>
+#include <uapi/linux/batadv_packet.h>
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
@@ -45,7 +48,6 @@
#include "hard-interface.h"
#include "log.h"
#include "network-coding.h"
-#include "packet.h"
#include "soft-interface.h"
static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
@@ -63,7 +65,7 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
}
/**
- * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv
+ * batadv_vlan_kobj_to_batpriv() - convert a vlan kobj in the associated batpriv
* @obj: kobject to covert
*
* Return: the associated batadv_priv struct.
@@ -83,7 +85,7 @@ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
}
/**
- * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct
+ * batadv_kobj_to_vlan() - convert a kobj in the associated softif_vlan struct
* @bat_priv: the bat priv with all the soft interface information
* @obj: kobject to covert
*
@@ -598,7 +600,7 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
}
/**
- * batadv_show_isolation_mark - print the current isolation mark/mask
+ * batadv_show_isolation_mark() - print the current isolation mark/mask
* @kobj: kobject representing the private mesh sysfs directory
* @attr: the batman-adv attribute the user is interacting with
* @buff: the buffer that will contain the data to send back to the user
@@ -616,8 +618,8 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
}
/**
- * batadv_store_isolation_mark - parse and store the isolation mark/mask entered
- * by the user
+ * batadv_store_isolation_mark() - parse and store the isolation mark/mask
+ * entered by the user
* @kobj: kobject representing the private mesh sysfs directory
* @attr: the batman-adv attribute the user is interacting with
* @buff: the buffer containing the user data
@@ -733,6 +735,12 @@ static struct batadv_attribute *batadv_vlan_attrs[] = {
NULL,
};
+/**
+ * batadv_sysfs_add_meshif() - Add soft interface specific sysfs entries
+ * @dev: netdev struct of the soft interface
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_sysfs_add_meshif(struct net_device *dev)
{
struct kobject *batif_kobject = &dev->dev.kobj;
@@ -773,6 +781,10 @@ out:
return -ENOMEM;
}
+/**
+ * batadv_sysfs_del_meshif() - Remove soft interface specific sysfs entries
+ * @dev: netdev struct of the soft interface
+ */
void batadv_sysfs_del_meshif(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
@@ -788,7 +800,7 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
}
/**
- * batadv_sysfs_add_vlan - add all the needed sysfs objects for the new vlan
+ * batadv_sysfs_add_vlan() - add all the needed sysfs objects for the new vlan
* @dev: netdev of the mesh interface
* @vlan: private data of the newly added VLAN interface
*
@@ -849,7 +861,7 @@ out:
}
/**
- * batadv_sysfs_del_vlan - remove all the sysfs objects for a given VLAN
+ * batadv_sysfs_del_vlan() - remove all the sysfs objects for a given VLAN
* @bat_priv: the bat priv with all the soft interface information
* @vlan: the private data of the VLAN to destroy
*/
@@ -894,7 +906,7 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
}
/**
- * batadv_store_mesh_iface_finish - store new hardif mesh_iface state
+ * batadv_store_mesh_iface_finish() - store new hardif mesh_iface state
* @net_dev: netdevice to add/remove to/from batman-adv soft-interface
* @ifname: name of soft-interface to modify
*
@@ -947,7 +959,7 @@ out:
}
/**
- * batadv_store_mesh_iface_work - store new hardif mesh_iface state
+ * batadv_store_mesh_iface_work() - store new hardif mesh_iface state
* @work: work queue item
*
* Changes the parts of the hard+soft interface which can not be modified under
@@ -1043,7 +1055,7 @@ static ssize_t batadv_show_iface_status(struct kobject *kobj,
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
/**
- * batadv_store_throughput_override - parse and store throughput override
+ * batadv_store_throughput_override() - parse and store throughput override
* entered by the user
* @kobj: kobject representing the private mesh sysfs directory
* @attr: the batman-adv attribute the user is interacting with
@@ -1130,6 +1142,13 @@ static struct batadv_attribute *batadv_batman_attrs[] = {
NULL,
};
+/**
+ * batadv_sysfs_add_hardif() - Add hard interface specific sysfs entries
+ * @hardif_obj: address where to store the pointer to new sysfs folder
+ * @dev: netdev struct of the hard interface
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev)
{
struct kobject *hardif_kobject = &dev->dev.kobj;
@@ -1164,6 +1183,11 @@ out:
return -ENOMEM;
}
+/**
+ * batadv_sysfs_del_hardif() - Remove hard interface specific sysfs entries
+ * @hardif_obj: address to the pointer to which stores batman-adv sysfs folder
+ * of the hard interface
+ */
void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
{
kobject_uevent(*hardif_obj, KOBJ_REMOVE);
@@ -1172,6 +1196,16 @@ void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
*hardif_obj = NULL;
}
+/**
+ * batadv_throw_uevent() - Send an uevent with batman-adv specific env data
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: subsystem type of event. Stored in uevent's BATTYPE
+ * @action: action type of event. Stored in uevent's BATACTION
+ * @data: string with additional information to the event (ignored for
+ * BATADV_UEV_DEL). Stored in uevent's BATDATA
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
enum batadv_uev_action action, const char *data)
{
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index e487412e256b..bbeee61221fa 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner
@@ -35,10 +36,23 @@ struct net_device;
*/
#define BATADV_SYSFS_VLAN_SUBDIR_PREFIX "vlan"
+/**
+ * struct batadv_attribute - sysfs export helper for batman-adv attributes
+ */
struct batadv_attribute {
+ /** @attr: sysfs attribute file */
struct attribute attr;
+
+ /**
+ * @show: function to export the current attribute's content to sysfs
+ */
ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
char *buf);
+
+ /**
+ * @store: function to load new value from character buffer and save it
+ * in batman-adv attribute
+ */
ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
char *buf, size_t count);
};
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 15cd2139381e..8b576712d0c1 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
@@ -19,13 +20,13 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/bug.h>
+#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
@@ -48,13 +49,13 @@
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "hard-interface.h"
#include "log.h"
#include "netlink.h"
#include "originator.h"
-#include "packet.h"
#include "send.h"
/**
@@ -97,7 +98,7 @@
static u8 batadv_tp_prerandom[4096] __read_mostly;
/**
- * batadv_tp_session_cookie - generate session cookie based on session ids
+ * batadv_tp_session_cookie() - generate session cookie based on session ids
* @session: TP session identifier
* @icmp_uid: icmp pseudo uid of the tp session
*
@@ -115,7 +116,7 @@ static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
}
/**
- * batadv_tp_cwnd - compute the new cwnd size
+ * batadv_tp_cwnd() - compute the new cwnd size
* @base: base cwnd size value
* @increment: the value to add to base to get the new size
* @min: minumim cwnd value (usually MSS)
@@ -140,7 +141,7 @@ static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min)
}
/**
- * batadv_tp_updated_cwnd - update the Congestion Windows
+ * batadv_tp_updated_cwnd() - update the Congestion Windows
* @tp_vars: the private data of the current TP meter session
* @mss: maximum segment size of transmission
*
@@ -176,7 +177,7 @@ static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss)
}
/**
- * batadv_tp_update_rto - calculate new retransmission timeout
+ * batadv_tp_update_rto() - calculate new retransmission timeout
* @tp_vars: the private data of the current TP meter session
* @new_rtt: new roundtrip time in msec
*/
@@ -212,7 +213,7 @@ static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars,
}
/**
- * batadv_tp_batctl_notify - send client status result to client
+ * batadv_tp_batctl_notify() - send client status result to client
* @reason: reason for tp meter session stop
* @dst: destination of tp_meter session
* @bat_priv: the bat priv with all the soft interface information
@@ -244,7 +245,7 @@ static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason,
}
/**
- * batadv_tp_batctl_error_notify - send client error result to client
+ * batadv_tp_batctl_error_notify() - send client error result to client
* @reason: reason for tp meter session stop
* @dst: destination of tp_meter session
* @bat_priv: the bat priv with all the soft interface information
@@ -259,7 +260,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
}
/**
- * batadv_tp_list_find - find a tp_vars object in the global list
+ * batadv_tp_list_find() - find a tp_vars object in the global list
* @bat_priv: the bat priv with all the soft interface information
* @dst: the other endpoint MAC address to look for
*
@@ -294,7 +295,8 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
}
/**
- * batadv_tp_list_find_session - find tp_vars session object in the global list
+ * batadv_tp_list_find_session() - find tp_vars session object in the global
+ * list
* @bat_priv: the bat priv with all the soft interface information
* @dst: the other endpoint MAC address to look for
* @session: session identifier
@@ -335,7 +337,7 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
}
/**
- * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for
+ * batadv_tp_vars_release() - release batadv_tp_vars from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the batadv_tp_vars
*/
@@ -360,7 +362,7 @@ static void batadv_tp_vars_release(struct kref *ref)
}
/**
- * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly
+ * batadv_tp_vars_put() - decrement the batadv_tp_vars refcounter and possibly
* release it
* @tp_vars: the private data of the current TP meter session to be free'd
*/
@@ -370,7 +372,7 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
}
/**
- * batadv_tp_sender_cleanup - cleanup sender data and drop and timer
+ * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer
* @bat_priv: the bat priv with all the soft interface information
* @tp_vars: the private data of the current TP meter session to cleanup
*/
@@ -400,7 +402,7 @@ static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
}
/**
- * batadv_tp_sender_end - print info about ended session and inform client
+ * batadv_tp_sender_end() - print info about ended session and inform client
* @bat_priv: the bat priv with all the soft interface information
* @tp_vars: the private data of the current TP meter session
*/
@@ -433,7 +435,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
}
/**
- * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully
+ * batadv_tp_sender_shutdown() - let sender thread/timer stop gracefully
* @tp_vars: the private data of the current TP meter session
* @reason: reason for tp meter session stop
*/
@@ -447,7 +449,7 @@ static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
}
/**
- * batadv_tp_sender_finish - stop sender session after test_length was reached
+ * batadv_tp_sender_finish() - stop sender session after test_length was reached
* @work: delayed work reference of the related tp_vars
*/
static void batadv_tp_sender_finish(struct work_struct *work)
@@ -463,7 +465,7 @@ static void batadv_tp_sender_finish(struct work_struct *work)
}
/**
- * batadv_tp_reset_sender_timer - reschedule the sender timer
+ * batadv_tp_reset_sender_timer() - reschedule the sender timer
* @tp_vars: the private TP meter data for this session
*
* Reschedule the timer using tp_vars->rto as delay
@@ -481,8 +483,8 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
}
/**
- * batadv_tp_sender_timeout - timer that fires in case of packet loss
- * @arg: address of the related tp_vars
+ * batadv_tp_sender_timeout() - timer that fires in case of packet loss
+ * @t: address to timer_list inside tp_vars
*
* If fired it means that there was packet loss.
* Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -531,7 +533,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t)
}
/**
- * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes
+ * batadv_tp_fill_prerandom() - Fill buffer with prefetched random bytes
* @tp_vars: the private TP meter data for this session
* @buf: Buffer to fill with bytes
* @nbytes: amount of pseudorandom bytes
@@ -563,7 +565,7 @@ static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars,
}
/**
- * batadv_tp_send_msg - send a single message
+ * batadv_tp_send_msg() - send a single message
* @tp_vars: the private TP meter data for this session
* @src: source mac address
* @orig_node: the originator of the destination
@@ -623,7 +625,7 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
}
/**
- * batadv_tp_recv_ack - ACK receiving function
+ * batadv_tp_recv_ack() - ACK receiving function
* @bat_priv: the bat priv with all the soft interface information
* @skb: the buffer containing the received packet
*
@@ -765,7 +767,7 @@ out:
}
/**
- * batadv_tp_avail - check if congestion window is not full
+ * batadv_tp_avail() - check if congestion window is not full
* @tp_vars: the private data of the current TP meter session
* @payload_len: size of the payload of a single message
*
@@ -783,7 +785,7 @@ static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars,
}
/**
- * batadv_tp_wait_available - wait until congestion window becomes free or
+ * batadv_tp_wait_available() - wait until congestion window becomes free or
* timeout is reached
* @tp_vars: the private data of the current TP meter session
* @plen: size of the payload of a single message
@@ -805,7 +807,7 @@ static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen)
}
/**
- * batadv_tp_send - main sending thread of a tp meter session
+ * batadv_tp_send() - main sending thread of a tp meter session
* @arg: address of the related tp_vars
*
* Return: nothing, this function never returns
@@ -904,7 +906,8 @@ out:
}
/**
- * batadv_tp_start_kthread - start new thread which manages the tp meter sender
+ * batadv_tp_start_kthread() - start new thread which manages the tp meter
+ * sender
* @tp_vars: the private data of the current TP meter session
*/
static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
@@ -935,7 +938,7 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
}
/**
- * batadv_tp_start - start a new tp meter session
+ * batadv_tp_start() - start a new tp meter session
* @bat_priv: the bat priv with all the soft interface information
* @dst: the receiver MAC address
* @test_length: test length in milliseconds
@@ -1060,7 +1063,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
}
/**
- * batadv_tp_stop - stop currently running tp meter session
+ * batadv_tp_stop() - stop currently running tp meter session
* @bat_priv: the bat priv with all the soft interface information
* @dst: the receiver MAC address
* @return_value: reason for tp meter session stop
@@ -1092,7 +1095,7 @@ out:
}
/**
- * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer
+ * batadv_tp_reset_receiver_timer() - reset the receiver shutdown timer
* @tp_vars: the private data of the current TP meter session
*
* start the receiver shutdown timer or reset it if already started
@@ -1104,9 +1107,9 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
}
/**
- * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
+ * batadv_tp_receiver_shutdown() - stop a tp meter receiver when timeout is
* reached without received ack
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
*/
static void batadv_tp_receiver_shutdown(struct timer_list *t)
{
@@ -1149,7 +1152,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t)
}
/**
- * batadv_tp_send_ack - send an ACK packet
+ * batadv_tp_send_ack() - send an ACK packet
* @bat_priv: the bat priv with all the soft interface information
* @dst: the mac address of the destination originator
* @seq: the sequence number to ACK
@@ -1221,7 +1224,7 @@ out:
}
/**
- * batadv_tp_handle_out_of_order - store an out of order packet
+ * batadv_tp_handle_out_of_order() - store an out of order packet
* @tp_vars: the private data of the current TP meter session
* @skb: the buffer containing the received packet
*
@@ -1297,7 +1300,7 @@ out:
}
/**
- * batadv_tp_ack_unordered - update number received bytes in current stream
+ * batadv_tp_ack_unordered() - update number received bytes in current stream
* without gaps
* @tp_vars: the private data of the current TP meter session
*/
@@ -1330,7 +1333,7 @@ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars)
}
/**
- * batadv_tp_init_recv - return matching or create new receiver tp_vars
+ * batadv_tp_init_recv() - return matching or create new receiver tp_vars
* @bat_priv: the bat priv with all the soft interface information
* @icmp: received icmp tp msg
*
@@ -1383,7 +1386,7 @@ out_unlock:
}
/**
- * batadv_tp_recv_msg - process a single data message
+ * batadv_tp_recv_msg() - process a single data message
* @bat_priv: the bat priv with all the soft interface information
* @skb: the buffer containing the received packet
*
@@ -1468,7 +1471,7 @@ out:
}
/**
- * batadv_tp_meter_recv - main TP Meter receiving function
+ * batadv_tp_meter_recv() - main TP Meter receiving function
* @bat_priv: the bat priv with all the soft interface information
* @skb: the buffer containing the received packet
*/
@@ -1494,7 +1497,7 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
}
/**
- * batadv_tp_meter_init - initialize global tp_meter structures
+ * batadv_tp_meter_init() - initialize global tp_meter structures
*/
void __init batadv_tp_meter_init(void)
{
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index a8ada5c123bd..c8b8f2cb2c2b 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 8a3ce79b1307..7550a9ccd695 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
@@ -20,14 +21,14 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
-#include <linux/bug.h>
+#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
#include <linux/crc32c.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jhash.h>
@@ -36,6 +37,7 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
+#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
@@ -50,6 +52,7 @@
#include <net/genetlink.h>
#include <net/netlink.h>
#include <net/sock.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "bridge_loop_avoidance.h"
@@ -58,7 +61,6 @@
#include "log.h"
#include "netlink.h"
#include "originator.h"
-#include "packet.h"
#include "soft-interface.h"
#include "tvlv.h"
@@ -86,7 +88,7 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
bool roaming);
/**
- * batadv_compare_tt - check if two TT entries are the same
+ * batadv_compare_tt() - check if two TT entries are the same
* @node: the list element pointer of the first TT entry
* @data2: pointer to the tt_common_entry of the second TT entry
*
@@ -105,7 +107,7 @@ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2)
}
/**
- * batadv_choose_tt - return the index of the tt entry in the hash table
+ * batadv_choose_tt() - return the index of the tt entry in the hash table
* @data: pointer to the tt_common_entry object to map
* @size: the size of the hash table
*
@@ -125,7 +127,7 @@ static inline u32 batadv_choose_tt(const void *data, u32 size)
}
/**
- * batadv_tt_hash_find - look for a client in the given hash table
+ * batadv_tt_hash_find() - look for a client in the given hash table
* @hash: the hash table to search
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
@@ -170,7 +172,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
}
/**
- * batadv_tt_local_hash_find - search the local table for a given client
+ * batadv_tt_local_hash_find() - search the local table for a given client
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
@@ -195,7 +197,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
}
/**
- * batadv_tt_global_hash_find - search the global table for a given client
+ * batadv_tt_global_hash_find() - search the global table for a given client
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to look for
* @vid: VLAN identifier
@@ -220,7 +222,7 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
}
/**
- * batadv_tt_local_entry_free_rcu - free the tt_local_entry
+ * batadv_tt_local_entry_free_rcu() - free the tt_local_entry
* @rcu: rcu pointer of the tt_local_entry
*/
static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
@@ -234,7 +236,7 @@ static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
}
/**
- * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
+ * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue
* for free after rcu grace period
* @ref: kref pointer of the nc_node
*/
@@ -251,7 +253,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
}
/**
- * batadv_tt_local_entry_put - decrement the tt_local_entry refcounter and
+ * batadv_tt_local_entry_put() - decrement the tt_local_entry refcounter and
* possibly release it
* @tt_local_entry: tt_local_entry to be free'd
*/
@@ -263,7 +265,7 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
}
/**
- * batadv_tt_global_entry_free_rcu - free the tt_global_entry
+ * batadv_tt_global_entry_free_rcu() - free the tt_global_entry
* @rcu: rcu pointer of the tt_global_entry
*/
static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
@@ -277,8 +279,8 @@ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
}
/**
- * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
- * for free after rcu grace period
+ * batadv_tt_global_entry_release() - release tt_global_entry from lists and
+ * queue for free after rcu grace period
* @ref: kref pointer of the nc_node
*/
static void batadv_tt_global_entry_release(struct kref *ref)
@@ -294,7 +296,7 @@ static void batadv_tt_global_entry_release(struct kref *ref)
}
/**
- * batadv_tt_global_entry_put - decrement the tt_global_entry refcounter and
+ * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
* possibly release it
* @tt_global_entry: tt_global_entry to be free'd
*/
@@ -306,7 +308,7 @@ batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
}
/**
- * batadv_tt_global_hash_count - count the number of orig entries
+ * batadv_tt_global_hash_count() - count the number of orig entries
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to count entries for
* @vid: VLAN identifier
@@ -331,8 +333,8 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_size_mod - change the size by v of the local table identified
- * by vid
+ * batadv_tt_local_size_mod() - change the size by v of the local table
+ * identified by vid
* @bat_priv: the bat priv with all the soft interface information
* @vid: the VLAN identifier of the sub-table to change
* @v: the amount to sum to the local table size
@@ -352,8 +354,8 @@ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_size_inc - increase by one the local table size for the given
- * vid
+ * batadv_tt_local_size_inc() - increase by one the local table size for the
+ * given vid
* @bat_priv: the bat priv with all the soft interface information
* @vid: the VLAN identifier
*/
@@ -364,8 +366,8 @@ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_size_dec - decrease by one the local table size for the given
- * vid
+ * batadv_tt_local_size_dec() - decrease by one the local table size for the
+ * given vid
* @bat_priv: the bat priv with all the soft interface information
* @vid: the VLAN identifier
*/
@@ -376,7 +378,7 @@ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_global_size_mod - change the size by v of the global table
+ * batadv_tt_global_size_mod() - change the size by v of the global table
* for orig_node identified by vid
* @orig_node: the originator for which the table has to be modified
* @vid: the VLAN identifier
@@ -404,7 +406,7 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
}
/**
- * batadv_tt_global_size_inc - increase by one the global table size for the
+ * batadv_tt_global_size_inc() - increase by one the global table size for the
* given vid
* @orig_node: the originator which global table size has to be decreased
* @vid: the vlan identifier
@@ -416,7 +418,7 @@ static void batadv_tt_global_size_inc(struct batadv_orig_node *orig_node,
}
/**
- * batadv_tt_global_size_dec - decrease by one the global table size for the
+ * batadv_tt_global_size_dec() - decrease by one the global table size for the
* given vid
* @orig_node: the originator which global table size has to be decreased
* @vid: the vlan identifier
@@ -428,7 +430,7 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
}
/**
- * batadv_tt_orig_list_entry_free_rcu - free the orig_entry
+ * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry
* @rcu: rcu pointer of the orig_entry
*/
static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
@@ -441,7 +443,7 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
}
/**
- * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
+ * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the tt orig entry
*/
@@ -457,7 +459,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
}
/**
- * batadv_tt_orig_list_entry_put - decrement the tt orig entry refcounter and
+ * batadv_tt_orig_list_entry_put() - decrement the tt orig entry refcounter and
* possibly release it
* @orig_entry: tt orig entry to be free'd
*/
@@ -468,7 +470,7 @@ batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
}
/**
- * batadv_tt_local_event - store a local TT event (ADD/DEL)
+ * batadv_tt_local_event() - store a local TT event (ADD/DEL)
* @bat_priv: the bat priv with all the soft interface information
* @tt_local_entry: the TT entry involved in the event
* @event_flags: flags to store in the event structure
@@ -543,7 +545,7 @@ unlock:
}
/**
- * batadv_tt_len - compute length in bytes of given number of tt changes
+ * batadv_tt_len() - compute length in bytes of given number of tt changes
* @changes_num: number of tt changes
*
* Return: computed length in bytes.
@@ -554,7 +556,7 @@ static int batadv_tt_len(int changes_num)
}
/**
- * batadv_tt_entries - compute the number of entries fitting in tt_len bytes
+ * batadv_tt_entries() - compute the number of entries fitting in tt_len bytes
* @tt_len: available space
*
* Return: the number of entries.
@@ -565,8 +567,8 @@ static u16 batadv_tt_entries(u16 tt_len)
}
/**
- * batadv_tt_local_table_transmit_size - calculates the local translation table
- * size when transmitted over the air
+ * batadv_tt_local_table_transmit_size() - calculates the local translation
+ * table size when transmitted over the air
* @bat_priv: the bat priv with all the soft interface information
*
* Return: local translation table size in bytes.
@@ -625,7 +627,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_add - add a new client to the local table or update an
+ * batadv_tt_local_add() - add a new client to the local table or update an
* existing client
* @soft_iface: netdev struct of the mesh interface
* @addr: the mac address of the client to add
@@ -830,7 +832,7 @@ out:
}
/**
- * batadv_tt_prepare_tvlv_global_data - prepare the TVLV TT header to send
+ * batadv_tt_prepare_tvlv_global_data() - prepare the TVLV TT header to send
* within a TT Response directed to another node
* @orig_node: originator for which the TT data has to be prepared
* @tt_data: uninitialised pointer to the address of the TVLV buffer
@@ -903,8 +905,8 @@ out:
}
/**
- * batadv_tt_prepare_tvlv_local_data - allocate and prepare the TT TVLV for this
- * node
+ * batadv_tt_prepare_tvlv_local_data() - allocate and prepare the TT TVLV for
+ * this node
* @bat_priv: the bat priv with all the soft interface information
* @tt_data: uninitialised pointer to the address of the TVLV buffer
* @tt_change: uninitialised pointer to the address of the area where the TT
@@ -977,8 +979,8 @@ out:
}
/**
- * batadv_tt_tvlv_container_update - update the translation table tvlv container
- * after local tt changes have been committed
+ * batadv_tt_tvlv_container_update() - update the translation table tvlv
+ * container after local tt changes have been committed
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
@@ -1053,6 +1055,14 @@ container_register:
}
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+/**
+ * batadv_tt_local_seq_print_text() - Print the local tt table in a seq file
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * Return: always 0
+ */
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1123,7 +1133,7 @@ out:
#endif
/**
- * batadv_tt_local_dump_entry - Dump one TT local entry into a message
+ * batadv_tt_local_dump_entry() - Dump one TT local entry into a message
* @msg :Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -1179,7 +1189,7 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message
+ * batadv_tt_local_dump_bucket() - Dump one TT local bucket into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -1216,7 +1226,7 @@ batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_tt_local_dump - Dump TT local entries into a message
+ * batadv_tt_local_dump() - Dump TT local entries into a message
* @msg: Netlink message to dump into
* @cb: Parameters from query
*
@@ -1300,7 +1310,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_remove - logically remove an entry from the local table
+ * batadv_tt_local_remove() - logically remove an entry from the local table
* @bat_priv: the bat priv with all the soft interface information
* @addr: the MAC address of the client to remove
* @vid: VLAN identifier
@@ -1362,7 +1372,7 @@ out:
}
/**
- * batadv_tt_local_purge_list - purge inactive tt local entries
+ * batadv_tt_local_purge_list() - purge inactive tt local entries
* @bat_priv: the bat priv with all the soft interface information
* @head: pointer to the list containing the local tt entries
* @timeout: parameter deciding whether a given tt local entry is considered
@@ -1397,7 +1407,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_purge - purge inactive tt local entries
+ * batadv_tt_local_purge() - purge inactive tt local entries
* @bat_priv: the bat priv with all the soft interface information
* @timeout: parameter deciding whether a given tt local entry is considered
* inactive or not
@@ -1490,7 +1500,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_global_orig_entry_find - find a TT orig_list_entry
+ * batadv_tt_global_orig_entry_find() - find a TT orig_list_entry
* @entry: the TT global entry where the orig_list_entry has to be
* extracted from
* @orig_node: the originator for which the orig_list_entry has to be found
@@ -1524,8 +1534,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
}
/**
- * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled
- * by a given originator
+ * batadv_tt_global_entry_has_orig() - check if a TT global entry is also
+ * handled by a given originator
* @entry: the TT global entry to check
* @orig_node: the originator to search in the list
*
@@ -1550,7 +1560,7 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
}
/**
- * batadv_tt_global_sync_flags - update TT sync flags
+ * batadv_tt_global_sync_flags() - update TT sync flags
* @tt_global: the TT global entry to update sync flags in
*
* Updates the sync flag bits in the tt_global flag attribute with a logical
@@ -1574,7 +1584,7 @@ batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global)
}
/**
- * batadv_tt_global_orig_entry_add - add or update a TT orig entry
+ * batadv_tt_global_orig_entry_add() - add or update a TT orig entry
* @tt_global: the TT global entry to add an orig entry in
* @orig_node: the originator to add an orig entry for
* @ttvn: translation table version number of this changeset
@@ -1624,7 +1634,7 @@ out:
}
/**
- * batadv_tt_global_add - add a new TT global entry or update an existing one
+ * batadv_tt_global_add() - add a new TT global entry or update an existing one
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the originator announcing the client
* @tt_addr: the mac address of the non-mesh client
@@ -1796,7 +1806,7 @@ out:
}
/**
- * batadv_transtable_best_orig - Get best originator list entry from tt entry
+ * batadv_transtable_best_orig() - Get best originator list entry from tt entry
* @bat_priv: the bat priv with all the soft interface information
* @tt_global_entry: global translation table entry to be analyzed
*
@@ -1842,8 +1852,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
- * batadv_tt_global_print_entry - print all orig nodes who announce the address
- * for this global entry
+ * batadv_tt_global_print_entry() - print all orig nodes who announce the
+ * address for this global entry
* @bat_priv: the bat priv with all the soft interface information
* @tt_global_entry: global translation table entry to be printed
* @seq: debugfs table seq_file struct
@@ -1925,6 +1935,13 @@ print_list:
}
}
+/**
+ * batadv_tt_global_seq_print_text() - Print the global tt table in a seq file
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * Return: always 0
+ */
int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1967,7 +1984,7 @@ out:
#endif
/**
- * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
+ * batadv_tt_global_dump_subentry() - Dump all TT local entries into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -2028,7 +2045,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_tt_global_dump_entry - Dump one TT global entry into a message
+ * batadv_tt_global_dump_entry() - Dump one TT global entry into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -2073,7 +2090,7 @@ batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message
+ * batadv_tt_global_dump_bucket() - Dump one TT local bucket into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
* @seq: Sequence number of netlink message
@@ -2112,7 +2129,7 @@ batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
}
/**
- * batadv_tt_global_dump - Dump TT global entries into a message
+ * batadv_tt_global_dump() - Dump TT global entries into a message
* @msg: Netlink message to dump into
* @cb: Parameters from query
*
@@ -2180,7 +2197,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
/**
- * _batadv_tt_global_del_orig_entry - remove and free an orig_entry
+ * _batadv_tt_global_del_orig_entry() - remove and free an orig_entry
* @tt_global_entry: the global entry to remove the orig_entry from
* @orig_entry: the orig entry to remove and free
*
@@ -2222,7 +2239,7 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
}
/**
- * batadv_tt_global_del_orig_node - remove orig_node from a global tt entry
+ * batadv_tt_global_del_orig_node() - remove orig_node from a global tt entry
* @bat_priv: the bat priv with all the soft interface information
* @tt_global_entry: the global entry to remove the orig_node from
* @orig_node: the originator announcing the client
@@ -2301,7 +2318,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_global_del - remove a client from the global table
+ * batadv_tt_global_del() - remove a client from the global table
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: an originator serving this client
* @addr: the mac address of the client
@@ -2367,8 +2384,8 @@ out:
}
/**
- * batadv_tt_global_del_orig - remove all the TT global entries belonging to the
- * given originator matching the provided vid
+ * batadv_tt_global_del_orig() - remove all the TT global entries belonging to
+ * the given originator matching the provided vid
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the originator owning the entries to remove
* @match_vid: the VLAN identifier to match. If negative all the entries will be
@@ -2539,7 +2556,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
}
/**
- * batadv_transtable_search - get the mesh destination for a given client
+ * batadv_transtable_search() - get the mesh destination for a given client
* @bat_priv: the bat priv with all the soft interface information
* @src: mac address of the source client
* @addr: mac address of the destination client
@@ -2599,7 +2616,7 @@ out:
}
/**
- * batadv_tt_global_crc - calculates the checksum of the local table belonging
+ * batadv_tt_global_crc() - calculates the checksum of the local table belonging
* to the given orig_node
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: originator for which the CRC should be computed
@@ -2694,7 +2711,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_crc - calculates the checksum of the local table
+ * batadv_tt_local_crc() - calculates the checksum of the local table
* @bat_priv: the bat priv with all the soft interface information
* @vid: VLAN identifier for which the CRC32 has to be computed
*
@@ -2751,7 +2768,7 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_req_node_release - free tt_req node entry
+ * batadv_tt_req_node_release() - free tt_req node entry
* @ref: kref pointer of the tt req_node entry
*/
static void batadv_tt_req_node_release(struct kref *ref)
@@ -2764,7 +2781,7 @@ static void batadv_tt_req_node_release(struct kref *ref)
}
/**
- * batadv_tt_req_node_put - decrement the tt_req_node refcounter and
+ * batadv_tt_req_node_put() - decrement the tt_req_node refcounter and
* possibly release it
* @tt_req_node: tt_req_node to be free'd
*/
@@ -2826,7 +2843,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_req_node_new - search and possibly create a tt_req_node object
+ * batadv_tt_req_node_new() - search and possibly create a tt_req_node object
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node this request is being issued for
*
@@ -2863,7 +2880,7 @@ unlock:
}
/**
- * batadv_tt_local_valid - verify that given tt entry is a valid one
+ * batadv_tt_local_valid() - verify that given tt entry is a valid one
* @entry_ptr: to be checked local tt entry
* @data_ptr: not used but definition required to satisfy the callback prototype
*
@@ -2897,7 +2914,7 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
}
/**
- * batadv_tt_tvlv_generate - fill the tvlv buff with the tt entries from the
+ * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the
* specified tt hash
* @bat_priv: the bat priv with all the soft interface information
* @hash: hash table containing the tt entries
@@ -2948,7 +2965,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_global_check_crc - check if all the CRCs are correct
+ * batadv_tt_global_check_crc() - check if all the CRCs are correct
* @orig_node: originator for which the CRCs have to be checked
* @tt_vlan: pointer to the first tvlv VLAN entry
* @num_vlan: number of tvlv VLAN entries
@@ -3005,7 +3022,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
}
/**
- * batadv_tt_local_update_crc - update all the local CRCs
+ * batadv_tt_local_update_crc() - update all the local CRCs
* @bat_priv: the bat priv with all the soft interface information
*/
static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv)
@@ -3021,7 +3038,7 @@ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_global_update_crc - update all the global CRCs for this orig_node
+ * batadv_tt_global_update_crc() - update all the global CRCs for this orig_node
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the orig_node for which the CRCs have to be updated
*/
@@ -3048,7 +3065,7 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
}
/**
- * batadv_send_tt_request - send a TT Request message to a given node
+ * batadv_send_tt_request() - send a TT Request message to a given node
* @bat_priv: the bat priv with all the soft interface information
* @dst_orig_node: the destination of the message
* @ttvn: the version number that the source of the message is looking for
@@ -3137,7 +3154,7 @@ out:
}
/**
- * batadv_send_other_tt_response - send reply to tt request concerning another
+ * batadv_send_other_tt_response() - send reply to tt request concerning another
* node's translation table
* @bat_priv: the bat priv with all the soft interface information
* @tt_data: tt data containing the tt request information
@@ -3270,8 +3287,8 @@ out:
}
/**
- * batadv_send_my_tt_response - send reply to tt request concerning this node's
- * translation table
+ * batadv_send_my_tt_response() - send reply to tt request concerning this
+ * node's translation table
* @bat_priv: the bat priv with all the soft interface information
* @tt_data: tt data containing the tt request information
* @req_src: mac address of tt request sender
@@ -3388,7 +3405,7 @@ out:
}
/**
- * batadv_send_tt_response - send reply to tt request
+ * batadv_send_tt_response() - send reply to tt request
* @bat_priv: the bat priv with all the soft interface information
* @tt_data: tt data containing the tt request information
* @req_src: mac address of tt request sender
@@ -3484,7 +3501,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
}
/**
- * batadv_is_my_client - check if a client is served by the local node
+ * batadv_is_my_client() - check if a client is served by the local node
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to check
* @vid: VLAN identifier
@@ -3514,7 +3531,7 @@ out:
}
/**
- * batadv_handle_tt_response - process incoming tt reply
+ * batadv_handle_tt_response() - process incoming tt reply
* @bat_priv: the bat priv with all the soft interface information
* @tt_data: tt data containing the tt request information
* @resp_src: mac address of tt reply sender
@@ -3607,7 +3624,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_check_roam_count - check if a client has roamed too frequently
+ * batadv_tt_check_roam_count() - check if a client has roamed too frequently
* @bat_priv: the bat priv with all the soft interface information
* @client: mac address of the roaming client
*
@@ -3662,7 +3679,7 @@ unlock:
}
/**
- * batadv_send_roam_adv - send a roaming advertisement message
+ * batadv_send_roam_adv() - send a roaming advertisement message
* @bat_priv: the bat priv with all the soft interface information
* @client: mac address of the roaming client
* @vid: VLAN identifier
@@ -3727,6 +3744,10 @@ static void batadv_tt_purge(struct work_struct *work)
msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
}
+/**
+ * batadv_tt_free() - Free translation table of soft interface
+ * @bat_priv: the bat priv with all the soft interface information
+ */
void batadv_tt_free(struct batadv_priv *bat_priv)
{
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1);
@@ -3744,7 +3765,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_local_set_flags - set or unset the specified flags on the local
+ * batadv_tt_local_set_flags() - set or unset the specified flags on the local
* table and possibly count them in the TT size
* @bat_priv: the bat priv with all the soft interface information
* @flags: the flag to switch
@@ -3830,7 +3851,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_local_commit_changes_nolock - commit all pending local tt changes
+ * batadv_tt_local_commit_changes_nolock() - commit all pending local tt changes
* which have been queued in the time since the last commit
* @bat_priv: the bat priv with all the soft interface information
*
@@ -3863,7 +3884,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_local_commit_changes - commit all pending local tt changes which
+ * batadv_tt_local_commit_changes() - commit all pending local tt changes which
* have been queued in the time since the last commit
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -3874,6 +3895,15 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
spin_unlock_bh(&bat_priv->tt.commit_lock);
}
+/**
+ * batadv_is_ap_isolated() - Check if packet from upper layer should be dropped
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: source mac address of packet
+ * @dst: destination mac address of packet
+ * @vid: vlan id of packet
+ *
+ * Return: true when src+dst(+vid) pair should be isolated, false otherwise
+ */
bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
unsigned short vid)
{
@@ -3909,7 +3939,7 @@ vlan_put:
}
/**
- * batadv_tt_update_orig - update global translation table with new tt
+ * batadv_tt_update_orig() - update global translation table with new tt
* information received via ogms
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the orig_node of the ogm
@@ -3994,7 +4024,7 @@ request_table:
}
/**
- * batadv_tt_global_client_is_roaming - check if a client is marked as roaming
+ * batadv_tt_global_client_is_roaming() - check if a client is marked as roaming
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to check
* @vid: VLAN identifier
@@ -4020,7 +4050,7 @@ out:
}
/**
- * batadv_tt_local_client_is_roaming - tells whether the client is roaming
+ * batadv_tt_local_client_is_roaming() - tells whether the client is roaming
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the local client to query
* @vid: VLAN identifier
@@ -4045,6 +4075,15 @@ out:
return ret;
}
+/**
+ * batadv_tt_add_temporary_global_entry() - Add temporary entry to global TT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node which the temporary entry should be associated with
+ * @addr: mac address of the client
+ * @vid: VLAN id of the new temporary global translation table
+ *
+ * Return: true when temporary tt entry could be added, false otherwise
+ */
bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const unsigned char *addr,
@@ -4069,7 +4108,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_local_resize_to_mtu - resize the local translation table fit the
+ * batadv_tt_local_resize_to_mtu() - resize the local translation table fit the
* maximum packet size that can be transported through the mesh
* @soft_iface: netdev struct of the mesh interface
*
@@ -4110,7 +4149,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
}
/**
- * batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container
+ * batadv_tt_tvlv_ogm_handler_v1() - process incoming tt tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
* @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -4149,7 +4188,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_tvlv_unicast_handler_v1 - process incoming (unicast) tt tvlv
+ * batadv_tt_tvlv_unicast_handler_v1() - process incoming (unicast) tt tvlv
* container
* @bat_priv: the bat priv with all the soft interface information
* @src: mac address of tt tvlv sender
@@ -4231,7 +4270,8 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
}
/**
- * batadv_roam_tvlv_unicast_handler_v1 - process incoming tt roam tvlv container
+ * batadv_roam_tvlv_unicast_handler_v1() - process incoming tt roam tvlv
+ * container
* @bat_priv: the bat priv with all the soft interface information
* @src: mac address of tt tvlv sender
* @dst: mac address of tt tvlv recipient
@@ -4281,7 +4321,7 @@ out:
}
/**
- * batadv_tt_init - initialise the translation table internals
+ * batadv_tt_init() - initialise the translation table internals
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success or negative error number in case of failure.
@@ -4317,7 +4357,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_global_is_isolated - check if a client is marked as isolated
+ * batadv_tt_global_is_isolated() - check if a client is marked as isolated
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client
* @vid: the identifier of the VLAN where this client is connected
@@ -4343,7 +4383,7 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
}
/**
- * batadv_tt_cache_init - Initialize tt memory object cache
+ * batadv_tt_cache_init() - Initialize tt memory object cache
*
* Return: 0 on success or negative error number in case of failure.
*/
@@ -4412,7 +4452,7 @@ err_tt_tl_destroy:
}
/**
- * batadv_tt_cache_destroy - Destroy tt memory object cache
+ * batadv_tt_cache_destroy() - Destroy tt memory object cache
*/
void batadv_tt_cache_destroy(void)
{
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 411d586191da..8d9e3abec2c8 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 1d9e267caec9..5ffcb45ac6ff 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -19,7 +20,7 @@
#include <linux/byteorder/generic.h>
#include <linux/etherdevice.h>
-#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/kernel.h>
#include <linux/kref.h>
@@ -35,14 +36,14 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
#include "originator.h"
-#include "packet.h"
#include "send.h"
#include "tvlv.h"
/**
- * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
+ * batadv_tvlv_handler_release() - release tvlv handler from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the tvlv
*/
@@ -55,7 +56,7 @@ static void batadv_tvlv_handler_release(struct kref *ref)
}
/**
- * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
+ * batadv_tvlv_handler_put() - decrement the tvlv container refcounter and
* possibly release it
* @tvlv_handler: the tvlv handler to free
*/
@@ -65,7 +66,7 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
}
/**
- * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
+ * batadv_tvlv_handler_get() - retrieve tvlv handler from the tvlv handler list
* based on the provided type and version (both need to match)
* @bat_priv: the bat priv with all the soft interface information
* @type: tvlv handler type to look for
@@ -99,7 +100,7 @@ batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
}
/**
- * batadv_tvlv_container_release - release tvlv from lists and free
+ * batadv_tvlv_container_release() - release tvlv from lists and free
* @ref: kref pointer of the tvlv
*/
static void batadv_tvlv_container_release(struct kref *ref)
@@ -111,7 +112,7 @@ static void batadv_tvlv_container_release(struct kref *ref)
}
/**
- * batadv_tvlv_container_put - decrement the tvlv container refcounter and
+ * batadv_tvlv_container_put() - decrement the tvlv container refcounter and
* possibly release it
* @tvlv: the tvlv container to free
*/
@@ -121,7 +122,7 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
}
/**
- * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
+ * batadv_tvlv_container_get() - retrieve tvlv container from the tvlv container
* list based on the provided type and version (both need to match)
* @bat_priv: the bat priv with all the soft interface information
* @type: tvlv container type to look for
@@ -155,7 +156,7 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
}
/**
- * batadv_tvlv_container_list_size - calculate the size of the tvlv container
+ * batadv_tvlv_container_list_size() - calculate the size of the tvlv container
* list entries
* @bat_priv: the bat priv with all the soft interface information
*
@@ -180,8 +181,8 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
}
/**
- * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
- * list
+ * batadv_tvlv_container_remove() - remove tvlv container from the tvlv
+ * container list
* @bat_priv: the bat priv with all the soft interface information
* @tvlv: the to be removed tvlv container
*
@@ -204,7 +205,7 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_container_unregister - unregister tvlv container based on the
+ * batadv_tvlv_container_unregister() - unregister tvlv container based on the
* provided type and version (both need to match)
* @bat_priv: the bat priv with all the soft interface information
* @type: tvlv container type to unregister
@@ -222,7 +223,7 @@ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_container_register - register tvlv type, version and content
+ * batadv_tvlv_container_register() - register tvlv type, version and content
* to be propagated with each (primary interface) OGM
* @bat_priv: the bat priv with all the soft interface information
* @type: tvlv container type
@@ -267,7 +268,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
+ * batadv_tvlv_realloc_packet_buff() - reallocate packet buffer to accommodate
* requested packet size
* @packet_buff: packet buffer
* @packet_buff_len: packet buffer size
@@ -300,7 +301,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
}
/**
- * batadv_tvlv_container_ogm_append - append tvlv container content to given
+ * batadv_tvlv_container_ogm_append() - append tvlv container content to given
* OGM packet buffer
* @bat_priv: the bat priv with all the soft interface information
* @packet_buff: ogm packet buffer
@@ -353,7 +354,7 @@ end:
}
/**
- * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
+ * batadv_tvlv_call_handler() - parse the given tvlv buffer to call the
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
* @tvlv_handler: tvlv callback function handling the tvlv content
@@ -407,7 +408,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
+ * batadv_tvlv_containers_process() - parse the given tvlv buffer to call the
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
* @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
@@ -474,7 +475,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
+ * batadv_tvlv_ogm_receive() - process an incoming ogm and call the appropriate
* handlers
* @bat_priv: the bat priv with all the soft interface information
* @batadv_ogm_packet: ogm packet containing the tvlv containers
@@ -501,7 +502,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_handler_register - register tvlv handler based on the provided
+ * batadv_tvlv_handler_register() - register tvlv handler based on the provided
* type and version (both need to match) for ogm tvlv payload and/or unicast
* payload
* @bat_priv: the bat priv with all the soft interface information
@@ -556,7 +557,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
+ * batadv_tvlv_handler_unregister() - unregister tvlv handler based on the
* provided type and version (both need to match)
* @bat_priv: the bat priv with all the soft interface information
* @type: tvlv handler type to be unregistered
@@ -579,7 +580,7 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
}
/**
- * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
+ * batadv_tvlv_unicast_send() - send a unicast packet with tvlv payload to the
* specified host
* @bat_priv: the bat priv with all the soft interface information
* @src: source mac address of the unicast packet
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index 4d01400ada30..a74df33f446d 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a62795868794..bb1578410e0c 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
@@ -34,10 +35,9 @@
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
-#include "packet.h"
-
struct seq_file;
#ifdef CONFIG_BATMAN_ADV_DAT
@@ -54,13 +54,15 @@ struct seq_file;
/**
* enum batadv_dhcp_recipient - dhcp destination
- * @BATADV_DHCP_NO: packet is not a dhcp message
- * @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server
- * @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client
*/
enum batadv_dhcp_recipient {
+ /** @BATADV_DHCP_NO: packet is not a dhcp message */
BATADV_DHCP_NO = 0,
+
+ /** @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server */
BATADV_DHCP_TO_SERVER,
+
+ /** @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client */
BATADV_DHCP_TO_CLIENT,
};
@@ -78,196 +80,274 @@ enum batadv_dhcp_recipient {
/**
* struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data
- * @ogm_buff: buffer holding the OGM packet
- * @ogm_buff_len: length of the OGM packet buffer
- * @ogm_seqno: OGM sequence number - used to identify each OGM
*/
struct batadv_hard_iface_bat_iv {
+ /** @ogm_buff: buffer holding the OGM packet */
unsigned char *ogm_buff;
+
+ /** @ogm_buff_len: length of the OGM packet buffer */
int ogm_buff_len;
+
+ /** @ogm_seqno: OGM sequence number - used to identify each OGM */
atomic_t ogm_seqno;
};
/**
* enum batadv_v_hard_iface_flags - interface flags useful to B.A.T.M.A.N. V
- * @BATADV_FULL_DUPLEX: tells if the connection over this link is full-duplex
- * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that no
- * throughput data is available for this interface and that default values are
- * assumed.
*/
enum batadv_v_hard_iface_flags {
+ /**
+ * @BATADV_FULL_DUPLEX: tells if the connection over this link is
+ * full-duplex
+ */
BATADV_FULL_DUPLEX = BIT(0),
+
+ /**
+ * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that
+ * no throughput data is available for this interface and that default
+ * values are assumed.
+ */
BATADV_WARNING_DEFAULT = BIT(1),
};
/**
* struct batadv_hard_iface_bat_v - per hard-interface B.A.T.M.A.N. V data
- * @elp_interval: time interval between two ELP transmissions
- * @elp_seqno: current ELP sequence number
- * @elp_skb: base skb containing the ELP message to send
- * @elp_wq: workqueue used to schedule ELP transmissions
- * @throughput_override: throughput override to disable link auto-detection
- * @flags: interface specific flags
*/
struct batadv_hard_iface_bat_v {
+ /** @elp_interval: time interval between two ELP transmissions */
atomic_t elp_interval;
+
+ /** @elp_seqno: current ELP sequence number */
atomic_t elp_seqno;
+
+ /** @elp_skb: base skb containing the ELP message to send */
struct sk_buff *elp_skb;
+
+ /** @elp_wq: workqueue used to schedule ELP transmissions */
struct delayed_work elp_wq;
+
+ /**
+ * @throughput_override: throughput override to disable link
+ * auto-detection
+ */
atomic_t throughput_override;
+
+ /** @flags: interface specific flags */
u8 flags;
};
/**
* enum batadv_hard_iface_wifi_flags - Flags describing the wifi configuration
* of a batadv_hard_iface
- * @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device
- * @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device
- * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device
- * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi device
*/
enum batadv_hard_iface_wifi_flags {
+ /** @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device */
BATADV_HARDIF_WIFI_WEXT_DIRECT = BIT(0),
+
+ /** @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device */
BATADV_HARDIF_WIFI_CFG80211_DIRECT = BIT(1),
+
+ /**
+ * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device
+ */
BATADV_HARDIF_WIFI_WEXT_INDIRECT = BIT(2),
+
+ /**
+ * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi
+ * device
+ */
BATADV_HARDIF_WIFI_CFG80211_INDIRECT = BIT(3),
};
/**
* struct batadv_hard_iface - network device known to batman-adv
- * @list: list node for batadv_hardif_list
- * @if_num: identificator of the interface
- * @if_status: status of the interface for batman-adv
- * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
- * @wifi_flags: flags whether this is (directly or indirectly) a wifi interface
- * @net_dev: pointer to the net_device
- * @hardif_obj: kobject of the per interface sysfs "mesh" directory
- * @refcount: number of contexts the object is used
- * @batman_adv_ptype: packet type describing packets that should be processed by
- * batman-adv for this interface
- * @soft_iface: the batman-adv interface which uses this network interface
- * @rcu: struct used for freeing in an RCU-safe manner
- * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
- * @bat_v: per hard-interface B.A.T.M.A.N. V data
- * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
- * @neigh_list: list of unique single hop neighbors via this interface
- * @neigh_list_lock: lock protecting neigh_list
*/
struct batadv_hard_iface {
+ /** @list: list node for batadv_hardif_list */
struct list_head list;
+
+ /** @if_num: identificator of the interface */
s16 if_num;
+
+ /** @if_status: status of the interface for batman-adv */
char if_status;
+
+ /**
+ * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
+ */
u8 num_bcasts;
+
+ /**
+ * @wifi_flags: flags whether this is (directly or indirectly) a wifi
+ * interface
+ */
u32 wifi_flags;
+
+ /** @net_dev: pointer to the net_device */
struct net_device *net_dev;
+
+ /** @hardif_obj: kobject of the per interface sysfs "mesh" directory */
struct kobject *hardif_obj;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /**
+ * @batman_adv_ptype: packet type describing packets that should be
+ * processed by batman-adv for this interface
+ */
struct packet_type batman_adv_ptype;
+
+ /**
+ * @soft_iface: the batman-adv interface which uses this network
+ * interface
+ */
struct net_device *soft_iface;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
+
+ /** @bat_iv: per hard-interface B.A.T.M.A.N. IV data */
struct batadv_hard_iface_bat_iv bat_iv;
+
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+ /** @bat_v: per hard-interface B.A.T.M.A.N. V data */
struct batadv_hard_iface_bat_v bat_v;
#endif
+
+ /**
+ * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
+ */
struct dentry *debug_dir;
+
+ /**
+ * @neigh_list: list of unique single hop neighbors via this interface
+ */
struct hlist_head neigh_list;
- /* neigh_list_lock protects: neigh_list */
+
+ /** @neigh_list_lock: lock protecting neigh_list */
spinlock_t neigh_list_lock;
};
/**
* struct batadv_orig_ifinfo - originator info per outgoing interface
- * @list: list node for orig_node::ifinfo_list
- * @if_outgoing: pointer to outgoing hard-interface
- * @router: router that should be used to reach this originator
- * @last_real_seqno: last and best known sequence number
- * @last_ttl: ttl of last received packet
- * @last_seqno_forwarded: seqno of the OGM which was forwarded last
- * @batman_seqno_reset: time when the batman seqno window was reset
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_orig_ifinfo {
+ /** @list: list node for &batadv_orig_node.ifinfo_list */
struct hlist_node list;
+
+ /** @if_outgoing: pointer to outgoing hard-interface */
struct batadv_hard_iface *if_outgoing;
- struct batadv_neigh_node __rcu *router; /* rcu protected pointer */
+
+ /** @router: router that should be used to reach this originator */
+ struct batadv_neigh_node __rcu *router;
+
+ /** @last_real_seqno: last and best known sequence number */
u32 last_real_seqno;
+
+ /** @last_ttl: ttl of last received packet */
u8 last_ttl;
+
+ /** @last_seqno_forwarded: seqno of the OGM which was forwarded last */
u32 last_seqno_forwarded;
+
+ /** @batman_seqno_reset: time when the batman seqno window was reset */
unsigned long batman_seqno_reset;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_frag_table_entry - head in the fragment buffer table
- * @fragment_list: head of list with fragments
- * @lock: lock to protect the list of fragments
- * @timestamp: time (jiffie) of last received fragment
- * @seqno: sequence number of the fragments in the list
- * @size: accumulated size of packets in list
- * @total_size: expected size of the assembled packet
*/
struct batadv_frag_table_entry {
+ /** @fragment_list: head of list with fragments */
struct hlist_head fragment_list;
- spinlock_t lock; /* protects fragment_list */
+
+ /** @lock: lock to protect the list of fragments */
+ spinlock_t lock;
+
+ /** @timestamp: time (jiffie) of last received fragment */
unsigned long timestamp;
+
+ /** @seqno: sequence number of the fragments in the list */
u16 seqno;
+
+ /** @size: accumulated size of packets in list */
u16 size;
+
+ /** @total_size: expected size of the assembled packet */
u16 total_size;
};
/**
* struct batadv_frag_list_entry - entry in a list of fragments
- * @list: list node information
- * @skb: fragment
- * @no: fragment number in the set
*/
struct batadv_frag_list_entry {
+ /** @list: list node information */
struct hlist_node list;
+
+ /** @skb: fragment */
struct sk_buff *skb;
+
+ /** @no: fragment number in the set */
u8 no;
};
/**
* struct batadv_vlan_tt - VLAN specific TT attributes
- * @crc: CRC32 checksum of the entries belonging to this vlan
- * @num_entries: number of TT entries for this VLAN
*/
struct batadv_vlan_tt {
+ /** @crc: CRC32 checksum of the entries belonging to this vlan */
u32 crc;
+
+ /** @num_entries: number of TT entries for this VLAN */
atomic_t num_entries;
};
/**
* struct batadv_orig_node_vlan - VLAN specific data per orig_node
- * @vid: the VLAN identifier
- * @tt: VLAN specific TT attributes
- * @list: list node for orig_node::vlan_list
- * @refcount: number of context where this object is currently in use
- * @rcu: struct used for freeing in a RCU-safe manner
*/
struct batadv_orig_node_vlan {
+ /** @vid: the VLAN identifier */
unsigned short vid;
+
+ /** @tt: VLAN specific TT attributes */
struct batadv_vlan_tt tt;
+
+ /** @list: list node for &batadv_orig_node.vlan_list */
struct hlist_node list;
+
+ /**
+ * @refcount: number of context where this object is currently in use
+ */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in a RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
- * @bcast_own: set of bitfields (one per hard-interface) where each one counts
- * the number of our OGMs this orig_node rebroadcasted "back" to us (relative
- * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
- * @bcast_own_sum: sum of bcast_own
- * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
- * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
*/
struct batadv_orig_bat_iv {
+ /**
+ * @bcast_own: set of bitfields (one per hard-interface) where each one
+ * counts the number of our OGMs this orig_node rebroadcasted "back" to
+ * us (relative to last_real_seqno). Every bitfield is
+ * BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
+ */
unsigned long *bcast_own;
+
+ /** @bcast_own_sum: sum of bcast_own */
u8 *bcast_own_sum;
- /* ogm_cnt_lock protects: bcast_own, bcast_own_sum,
+
+ /**
+ * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
* neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
*/
spinlock_t ogm_cnt_lock;
@@ -275,130 +355,205 @@ struct batadv_orig_bat_iv {
/**
* struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
- * @orig: originator ethernet address
- * @ifinfo_list: list for routers per outgoing interface
- * @last_bonding_candidate: pointer to last ifinfo of last used router
- * @dat_addr: address of the orig node in the distributed hash
- * @last_seen: time when last packet from this node was received
- * @bcast_seqno_reset: time when the broadcast seqno window was reset
- * @mcast_handler_lock: synchronizes mcast-capability and -flag changes
- * @mcast_flags: multicast flags announced by the orig node
- * @mcast_want_all_unsnoopables_node: a list node for the
- * mcast.want_all_unsnoopables list
- * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list
- * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list
- * @capabilities: announced capabilities of this originator
- * @capa_initialized: bitfield to remember whether a capability was initialized
- * @last_ttvn: last seen translation table version number
- * @tt_buff: last tt changeset this node received from the orig node
- * @tt_buff_len: length of the last tt changeset this node received from the
- * orig node
- * @tt_buff_lock: lock that protects tt_buff and tt_buff_len
- * @tt_lock: prevents from updating the table while reading it. Table update is
- * made up by two operations (data structure update and metdata -CRC/TTVN-
- * recalculation) and they have to be executed atomically in order to avoid
- * another thread to read the table/metadata between those.
- * @bcast_bits: bitfield containing the info which payload broadcast originated
- * from this orig node this host already has seen (relative to
- * last_bcast_seqno)
- * @last_bcast_seqno: last broadcast sequence number received by this host
- * @neigh_list: list of potential next hop neighbor towards this orig node
- * @neigh_list_lock: lock protecting neigh_list and router
- * @hash_entry: hlist node for batadv_priv::orig_hash
- * @bat_priv: pointer to soft_iface this orig node belongs to
- * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
- * @in_coding_list: list of nodes this orig can hear
- * @out_coding_list: list of nodes that can hear this orig
- * @in_coding_list_lock: protects in_coding_list
- * @out_coding_list_lock: protects out_coding_list
- * @fragments: array with heads for fragment chains
- * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by the
- * originator represented by this object
- * @vlan_list_lock: lock protecting vlan_list
- * @bat_iv: B.A.T.M.A.N. IV private structure
*/
struct batadv_orig_node {
+ /** @orig: originator ethernet address */
u8 orig[ETH_ALEN];
+
+ /** @ifinfo_list: list for routers per outgoing interface */
struct hlist_head ifinfo_list;
+
+ /**
+ * @last_bonding_candidate: pointer to last ifinfo of last used router
+ */
struct batadv_orig_ifinfo *last_bonding_candidate;
+
#ifdef CONFIG_BATMAN_ADV_DAT
+ /** @dat_addr: address of the orig node in the distributed hash */
batadv_dat_addr_t dat_addr;
#endif
+
+ /** @last_seen: time when last packet from this node was received */
unsigned long last_seen;
+
+ /**
+ * @bcast_seqno_reset: time when the broadcast seqno window was reset
+ */
unsigned long bcast_seqno_reset;
+
#ifdef CONFIG_BATMAN_ADV_MCAST
- /* synchronizes mcast tvlv specific orig changes */
+ /**
+ * @mcast_handler_lock: synchronizes mcast-capability and -flag changes
+ */
spinlock_t mcast_handler_lock;
+
+ /** @mcast_flags: multicast flags announced by the orig node */
u8 mcast_flags;
+
+ /**
+ * @mcast_want_all_unsnoopables_node: a list node for the
+ * mcast.want_all_unsnoopables list
+ */
struct hlist_node mcast_want_all_unsnoopables_node;
+
+ /**
+ * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4
+ * list
+ */
struct hlist_node mcast_want_all_ipv4_node;
+ /**
+ * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6
+ * list
+ */
struct hlist_node mcast_want_all_ipv6_node;
#endif
+
+ /** @capabilities: announced capabilities of this originator */
unsigned long capabilities;
+
+ /**
+ * @capa_initialized: bitfield to remember whether a capability was
+ * initialized
+ */
unsigned long capa_initialized;
+
+ /** @last_ttvn: last seen translation table version number */
atomic_t last_ttvn;
+
+ /** @tt_buff: last tt changeset this node received from the orig node */
unsigned char *tt_buff;
+
+ /**
+ * @tt_buff_len: length of the last tt changeset this node received
+ * from the orig node
+ */
s16 tt_buff_len;
- spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */
- /* prevents from changing the table while reading it */
+
+ /** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */
+ spinlock_t tt_buff_lock;
+
+ /**
+ * @tt_lock: prevents from updating the table while reading it. Table
+ * update is made up by two operations (data structure update and
+ * metdata -CRC/TTVN-recalculation) and they have to be executed
+ * atomically in order to avoid another thread to read the
+ * table/metadata between those.
+ */
spinlock_t tt_lock;
+
+ /**
+ * @bcast_bits: bitfield containing the info which payload broadcast
+ * originated from this orig node this host already has seen (relative
+ * to last_bcast_seqno)
+ */
DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
+
+ /**
+ * @last_bcast_seqno: last broadcast sequence number received by this
+ * host
+ */
u32 last_bcast_seqno;
+
+ /**
+ * @neigh_list: list of potential next hop neighbor towards this orig
+ * node
+ */
struct hlist_head neigh_list;
- /* neigh_list_lock protects: neigh_list, ifinfo_list,
- * last_bonding_candidate and router
+
+ /**
+ * @neigh_list_lock: lock protecting neigh_list, ifinfo_list,
+ * last_bonding_candidate and router
*/
spinlock_t neigh_list_lock;
+
+ /** @hash_entry: hlist node for &batadv_priv.orig_hash */
struct hlist_node hash_entry;
+
+ /** @bat_priv: pointer to soft_iface this orig node belongs to */
struct batadv_priv *bat_priv;
- /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */
+
+ /** @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno */
spinlock_t bcast_seqno_lock;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
+
#ifdef CONFIG_BATMAN_ADV_NC
+ /** @in_coding_list: list of nodes this orig can hear */
struct list_head in_coding_list;
+
+ /** @out_coding_list: list of nodes that can hear this orig */
struct list_head out_coding_list;
- spinlock_t in_coding_list_lock; /* Protects in_coding_list */
- spinlock_t out_coding_list_lock; /* Protects out_coding_list */
+
+ /** @in_coding_list_lock: protects in_coding_list */
+ spinlock_t in_coding_list_lock;
+
+ /** @out_coding_list_lock: protects out_coding_list */
+ spinlock_t out_coding_list_lock;
#endif
+
+ /** @fragments: array with heads for fragment chains */
struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT];
+
+ /**
+ * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by
+ * the originator represented by this object
+ */
struct hlist_head vlan_list;
- spinlock_t vlan_list_lock; /* protects vlan_list */
+
+ /** @vlan_list_lock: lock protecting vlan_list */
+ spinlock_t vlan_list_lock;
+
+ /** @bat_iv: B.A.T.M.A.N. IV private structure */
struct batadv_orig_bat_iv bat_iv;
};
/**
* enum batadv_orig_capabilities - orig node capabilities
- * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table enabled
- * @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled
- * @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability
- * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability
- * (= orig node announces a tvlv of type BATADV_TVLV_MCAST)
*/
enum batadv_orig_capabilities {
+ /**
+ * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table
+ * enabled
+ */
BATADV_ORIG_CAPA_HAS_DAT,
+
+ /** @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled */
BATADV_ORIG_CAPA_HAS_NC,
+
+ /** @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability */
BATADV_ORIG_CAPA_HAS_TT,
+
+ /**
+ * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability
+ * (= orig node announces a tvlv of type BATADV_TVLV_MCAST)
+ */
BATADV_ORIG_CAPA_HAS_MCAST,
};
/**
* struct batadv_gw_node - structure for orig nodes announcing gw capabilities
- * @list: list node for batadv_priv_gw::list
- * @orig_node: pointer to corresponding orig node
- * @bandwidth_down: advertised uplink download bandwidth
- * @bandwidth_up: advertised uplink upload bandwidth
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_gw_node {
+ /** @list: list node for &batadv_priv_gw.list */
struct hlist_node list;
+
+ /** @orig_node: pointer to corresponding orig node */
struct batadv_orig_node *orig_node;
+
+ /** @bandwidth_down: advertised uplink download bandwidth */
u32 bandwidth_down;
+
+ /** @bandwidth_up: advertised uplink upload bandwidth */
u32 bandwidth_up;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
@@ -407,118 +562,161 @@ DECLARE_EWMA(throughput, 10, 8)
/**
* struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
* information
- * @throughput: ewma link throughput towards this neighbor
- * @elp_interval: time interval between two ELP transmissions
- * @elp_latest_seqno: latest and best known ELP sequence number
- * @last_unicast_tx: when the last unicast packet has been sent to this neighbor
- * @metric_work: work queue callback item for metric update
*/
struct batadv_hardif_neigh_node_bat_v {
+ /** @throughput: ewma link throughput towards this neighbor */
struct ewma_throughput throughput;
+
+ /** @elp_interval: time interval between two ELP transmissions */
u32 elp_interval;
+
+ /** @elp_latest_seqno: latest and best known ELP sequence number */
u32 elp_latest_seqno;
+
+ /**
+ * @last_unicast_tx: when the last unicast packet has been sent to this
+ * neighbor
+ */
unsigned long last_unicast_tx;
+
+ /** @metric_work: work queue callback item for metric update */
struct work_struct metric_work;
};
/**
* struct batadv_hardif_neigh_node - unique neighbor per hard-interface
- * @list: list node for batadv_hard_iface::neigh_list
- * @addr: the MAC address of the neighboring interface
- * @orig: the address of the originator this neighbor node belongs to
- * @if_incoming: pointer to incoming hard-interface
- * @last_seen: when last packet via this neighbor was received
- * @bat_v: B.A.T.M.A.N. V private data
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in a RCU-safe manner
*/
struct batadv_hardif_neigh_node {
+ /** @list: list node for &batadv_hard_iface.neigh_list */
struct hlist_node list;
+
+ /** @addr: the MAC address of the neighboring interface */
u8 addr[ETH_ALEN];
+
+ /**
+ * @orig: the address of the originator this neighbor node belongs to
+ */
u8 orig[ETH_ALEN];
+
+ /** @if_incoming: pointer to incoming hard-interface */
struct batadv_hard_iface *if_incoming;
+
+ /** @last_seen: when last packet via this neighbor was received */
unsigned long last_seen;
+
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+ /** @bat_v: B.A.T.M.A.N. V private data */
struct batadv_hardif_neigh_node_bat_v bat_v;
#endif
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in a RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_neigh_node - structure for single hops neighbors
- * @list: list node for batadv_orig_node::neigh_list
- * @orig_node: pointer to corresponding orig_node
- * @addr: the MAC address of the neighboring interface
- * @ifinfo_list: list for routing metrics per outgoing interface
- * @ifinfo_lock: lock protecting private ifinfo members and list
- * @if_incoming: pointer to incoming hard-interface
- * @last_seen: when last packet via this neighbor was received
- * @hardif_neigh: hardif_neigh of this neighbor
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_neigh_node {
+ /** @list: list node for &batadv_orig_node.neigh_list */
struct hlist_node list;
+
+ /** @orig_node: pointer to corresponding orig_node */
struct batadv_orig_node *orig_node;
+
+ /** @addr: the MAC address of the neighboring interface */
u8 addr[ETH_ALEN];
+
+ /** @ifinfo_list: list for routing metrics per outgoing interface */
struct hlist_head ifinfo_list;
- spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
+
+ /** @ifinfo_lock: lock protecting ifinfo_list and its members */
+ spinlock_t ifinfo_lock;
+
+ /** @if_incoming: pointer to incoming hard-interface */
struct batadv_hard_iface *if_incoming;
+
+ /** @last_seen: when last packet via this neighbor was received */
unsigned long last_seen;
+
+ /** @hardif_neigh: hardif_neigh of this neighbor */
struct batadv_hardif_neigh_node *hardif_neigh;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing
* interface for B.A.T.M.A.N. IV
- * @tq_recv: ring buffer of received TQ values from this neigh node
- * @tq_index: ring buffer index
- * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
- * @real_bits: bitfield containing the number of OGMs received from this neigh
- * node (relative to orig_node->last_real_seqno)
- * @real_packet_count: counted result of real_bits
*/
struct batadv_neigh_ifinfo_bat_iv {
+ /** @tq_recv: ring buffer of received TQ values from this neigh node */
u8 tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
+
+ /** @tq_index: ring buffer index */
u8 tq_index;
+
+ /**
+ * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
+ */
u8 tq_avg;
+
+ /**
+ * @real_bits: bitfield containing the number of OGMs received from this
+ * neigh node (relative to orig_node->last_real_seqno)
+ */
DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
+
+ /** @real_packet_count: counted result of real_bits */
u8 real_packet_count;
};
/**
* struct batadv_neigh_ifinfo_bat_v - neighbor information per outgoing
* interface for B.A.T.M.A.N. V
- * @throughput: last throughput metric received from originator via this neigh
- * @last_seqno: last sequence number known for this neighbor
*/
struct batadv_neigh_ifinfo_bat_v {
+ /**
+ * @throughput: last throughput metric received from originator via this
+ * neigh
+ */
u32 throughput;
+
+ /** @last_seqno: last sequence number known for this neighbor */
u32 last_seqno;
};
/**
* struct batadv_neigh_ifinfo - neighbor information per outgoing interface
- * @list: list node for batadv_neigh_node::ifinfo_list
- * @if_outgoing: pointer to outgoing hard-interface
- * @bat_iv: B.A.T.M.A.N. IV private structure
- * @bat_v: B.A.T.M.A.N. V private data
- * @last_ttl: last received ttl from this neigh node
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in a RCU-safe manner
*/
struct batadv_neigh_ifinfo {
+ /** @list: list node for &batadv_neigh_node.ifinfo_list */
struct hlist_node list;
+
+ /** @if_outgoing: pointer to outgoing hard-interface */
struct batadv_hard_iface *if_outgoing;
+
+ /** @bat_iv: B.A.T.M.A.N. IV private structure */
struct batadv_neigh_ifinfo_bat_iv bat_iv;
+
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+ /** @bat_v: B.A.T.M.A.N. V private data */
struct batadv_neigh_ifinfo_bat_v bat_v;
#endif
+
+ /** @last_ttl: last received ttl from this neigh node */
u8 last_ttl;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in a RCU-safe manner */
struct rcu_head rcu;
};
@@ -526,148 +724,278 @@ struct batadv_neigh_ifinfo {
/**
* struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression
- * @orig: mac address of orig node orginating the broadcast
- * @crc: crc32 checksum of broadcast payload
- * @entrytime: time when the broadcast packet was received
*/
struct batadv_bcast_duplist_entry {
+ /** @orig: mac address of orig node orginating the broadcast */
u8 orig[ETH_ALEN];
+
+ /** @crc: crc32 checksum of broadcast payload */
__be32 crc;
+
+ /** @entrytime: time when the broadcast packet was received */
unsigned long entrytime;
};
#endif
/**
* enum batadv_counters - indices for traffic counters
- * @BATADV_CNT_TX: transmitted payload traffic packet counter
- * @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter
- * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet counter
- * @BATADV_CNT_RX: received payload traffic packet counter
- * @BATADV_CNT_RX_BYTES: received payload traffic bytes counter
- * @BATADV_CNT_FORWARD: forwarded payload traffic packet counter
- * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter
- * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet counter
- * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter
- * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
- * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter
- * @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter
- * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter
- * @BATADV_CNT_FRAG_RX: received fragment traffic packet counter
- * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter
- * @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter
- * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter
- * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
- * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter
- * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter
- * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter
- * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet counter
- * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter
- * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
- * @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter
- * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter
- * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter
- * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet
- * counter
- * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
- * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter
- * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter
- * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter
- * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding
- * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
- * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter
- * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet
- * counter
- * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc
- * mode.
- * @BATADV_CNT_NUM: number of traffic counters
*/
enum batadv_counters {
+ /** @BATADV_CNT_TX: transmitted payload traffic packet counter */
BATADV_CNT_TX,
+
+ /** @BATADV_CNT_TX_BYTES: transmitted payload traffic bytes counter */
BATADV_CNT_TX_BYTES,
+
+ /**
+ * @BATADV_CNT_TX_DROPPED: dropped transmission payload traffic packet
+ * counter
+ */
BATADV_CNT_TX_DROPPED,
+
+ /** @BATADV_CNT_RX: received payload traffic packet counter */
BATADV_CNT_RX,
+
+ /** @BATADV_CNT_RX_BYTES: received payload traffic bytes counter */
BATADV_CNT_RX_BYTES,
+
+ /** @BATADV_CNT_FORWARD: forwarded payload traffic packet counter */
BATADV_CNT_FORWARD,
+
+ /**
+ * @BATADV_CNT_FORWARD_BYTES: forwarded payload traffic bytes counter
+ */
BATADV_CNT_FORWARD_BYTES,
+
+ /**
+ * @BATADV_CNT_MGMT_TX: transmitted routing protocol traffic packet
+ * counter
+ */
BATADV_CNT_MGMT_TX,
+
+ /**
+ * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes
+ * counter
+ */
BATADV_CNT_MGMT_TX_BYTES,
+
+ /**
+ * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
+ */
BATADV_CNT_MGMT_RX,
+
+ /**
+ * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes
+ * counter
+ */
BATADV_CNT_MGMT_RX_BYTES,
+
+ /** @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter */
BATADV_CNT_FRAG_TX,
+
+ /**
+ * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter
+ */
BATADV_CNT_FRAG_TX_BYTES,
+
+ /** @BATADV_CNT_FRAG_RX: received fragment traffic packet counter */
BATADV_CNT_FRAG_RX,
+
+ /**
+ * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter
+ */
BATADV_CNT_FRAG_RX_BYTES,
+
+ /** @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter */
BATADV_CNT_FRAG_FWD,
+
+ /**
+ * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter
+ */
BATADV_CNT_FRAG_FWD_BYTES,
+
+ /**
+ * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
+ */
BATADV_CNT_TT_REQUEST_TX,
+
+ /** @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter */
BATADV_CNT_TT_REQUEST_RX,
+
+ /**
+ * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet
+ * counter
+ */
BATADV_CNT_TT_RESPONSE_TX,
+
+ /**
+ * @BATADV_CNT_TT_RESPONSE_RX: received tt resp traffic packet counter
+ */
BATADV_CNT_TT_RESPONSE_RX,
+
+ /**
+ * @BATADV_CNT_TT_ROAM_ADV_TX: transmitted tt roam traffic packet
+ * counter
+ */
BATADV_CNT_TT_ROAM_ADV_TX,
+
+ /**
+ * @BATADV_CNT_TT_ROAM_ADV_RX: received tt roam traffic packet counter
+ */
BATADV_CNT_TT_ROAM_ADV_RX,
+
#ifdef CONFIG_BATMAN_ADV_DAT
+ /**
+ * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
+ */
BATADV_CNT_DAT_GET_TX,
+
+ /** @BATADV_CNT_DAT_GET_RX: received dht GET traffic packet counter */
BATADV_CNT_DAT_GET_RX,
+
+ /**
+ * @BATADV_CNT_DAT_PUT_TX: transmitted dht PUT traffic packet counter
+ */
BATADV_CNT_DAT_PUT_TX,
+
+ /** @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter */
BATADV_CNT_DAT_PUT_RX,
+
+ /**
+ * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic
+ * packet counter
+ */
BATADV_CNT_DAT_CACHED_REPLY_TX,
#endif
+
#ifdef CONFIG_BATMAN_ADV_NC
+ /**
+ * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
+ */
BATADV_CNT_NC_CODE,
+
+ /**
+ * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes
+ * counter
+ */
BATADV_CNT_NC_CODE_BYTES,
+
+ /**
+ * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet
+ * counter
+ */
BATADV_CNT_NC_RECODE,
+
+ /**
+ * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes
+ * counter
+ */
BATADV_CNT_NC_RECODE_BYTES,
+
+ /**
+ * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc
+ * decoding
+ */
BATADV_CNT_NC_BUFFER,
+
+ /**
+ * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
+ */
BATADV_CNT_NC_DECODE,
+
+ /**
+ * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes
+ * counter
+ */
BATADV_CNT_NC_DECODE_BYTES,
+
+ /**
+ * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic
+ * packet counter
+ */
BATADV_CNT_NC_DECODE_FAILED,
+
+ /**
+ * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in
+ * promisc mode.
+ */
BATADV_CNT_NC_SNIFFED,
#endif
+
+ /** @BATADV_CNT_NUM: number of traffic counters */
BATADV_CNT_NUM,
};
/**
* struct batadv_priv_tt - per mesh interface translation table data
- * @vn: translation table version number
- * @ogm_append_cnt: counter of number of OGMs containing the local tt diff
- * @local_changes: changes registered in an originator interval
- * @changes_list: tracks tt local changes within an originator interval
- * @local_hash: local translation table hash table
- * @global_hash: global translation table hash table
- * @req_list: list of pending & unanswered tt_requests
- * @roam_list: list of the last roaming events of each client limiting the
- * number of roaming events to avoid route flapping
- * @changes_list_lock: lock protecting changes_list
- * @req_list_lock: lock protecting req_list
- * @roam_list_lock: lock protecting roam_list
- * @last_changeset: last tt changeset this host has generated
- * @last_changeset_len: length of last tt changeset this host has generated
- * @last_changeset_lock: lock protecting last_changeset & last_changeset_len
- * @commit_lock: prevents from executing a local TT commit while reading the
- * local table. The local TT commit is made up by two operations (data
- * structure update and metdata -CRC/TTVN- recalculation) and they have to be
- * executed atomically in order to avoid another thread to read the
- * table/metadata between those.
- * @work: work queue callback item for translation table purging
*/
struct batadv_priv_tt {
+ /** @vn: translation table version number */
atomic_t vn;
+
+ /**
+ * @ogm_append_cnt: counter of number of OGMs containing the local tt
+ * diff
+ */
atomic_t ogm_append_cnt;
+
+ /** @local_changes: changes registered in an originator interval */
atomic_t local_changes;
+
+ /**
+ * @changes_list: tracks tt local changes within an originator interval
+ */
struct list_head changes_list;
+
+ /** @local_hash: local translation table hash table */
struct batadv_hashtable *local_hash;
+
+ /** @global_hash: global translation table hash table */
struct batadv_hashtable *global_hash;
+
+ /** @req_list: list of pending & unanswered tt_requests */
struct hlist_head req_list;
+
+ /**
+ * @roam_list: list of the last roaming events of each client limiting
+ * the number of roaming events to avoid route flapping
+ */
struct list_head roam_list;
- spinlock_t changes_list_lock; /* protects changes */
- spinlock_t req_list_lock; /* protects req_list */
- spinlock_t roam_list_lock; /* protects roam_list */
+
+ /** @changes_list_lock: lock protecting changes_list */
+ spinlock_t changes_list_lock;
+
+ /** @req_list_lock: lock protecting req_list */
+ spinlock_t req_list_lock;
+
+ /** @roam_list_lock: lock protecting roam_list */
+ spinlock_t roam_list_lock;
+
+ /** @last_changeset: last tt changeset this host has generated */
unsigned char *last_changeset;
+
+ /**
+ * @last_changeset_len: length of last tt changeset this host has
+ * generated
+ */
s16 last_changeset_len;
- /* protects last_changeset & last_changeset_len */
+
+ /**
+ * @last_changeset_lock: lock protecting last_changeset &
+ * last_changeset_len
+ */
spinlock_t last_changeset_lock;
- /* prevents from executing a commit while reading the table */
+
+ /**
+ * @commit_lock: prevents from executing a local TT commit while reading
+ * the local table. The local TT commit is made up by two operations
+ * (data structure update and metdata -CRC/TTVN- recalculation) and
+ * they have to be executed atomically in order to avoid another thread
+ * to read the table/metadata between those.
+ */
spinlock_t commit_lock;
+
+ /** @work: work queue callback item for translation table purging */
struct delayed_work work;
};
@@ -675,31 +1003,57 @@ struct batadv_priv_tt {
/**
* struct batadv_priv_bla - per mesh interface bridge loope avoidance data
- * @num_requests: number of bla requests in flight
- * @claim_hash: hash table containing mesh nodes this host has claimed
- * @backbone_hash: hash table containing all detected backbone gateways
- * @loopdetect_addr: MAC address used for own loopdetection frames
- * @loopdetect_lasttime: time when the loopdetection frames were sent
- * @loopdetect_next: how many periods to wait for the next loopdetect process
- * @bcast_duplist: recently received broadcast packets array (for broadcast
- * duplicate suppression)
- * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist
- * @bcast_duplist_lock: lock protecting bcast_duplist & bcast_duplist_curr
- * @claim_dest: local claim data (e.g. claim group)
- * @work: work queue callback item for cleanups & bla announcements
*/
struct batadv_priv_bla {
+ /** @num_requests: number of bla requests in flight */
atomic_t num_requests;
+
+ /**
+ * @claim_hash: hash table containing mesh nodes this host has claimed
+ */
struct batadv_hashtable *claim_hash;
+
+ /**
+ * @backbone_hash: hash table containing all detected backbone gateways
+ */
struct batadv_hashtable *backbone_hash;
+
+ /** @loopdetect_addr: MAC address used for own loopdetection frames */
u8 loopdetect_addr[ETH_ALEN];
+
+ /**
+ * @loopdetect_lasttime: time when the loopdetection frames were sent
+ */
unsigned long loopdetect_lasttime;
+
+ /**
+ * @loopdetect_next: how many periods to wait for the next loopdetect
+ * process
+ */
atomic_t loopdetect_next;
+
+ /**
+ * @bcast_duplist: recently received broadcast packets array (for
+ * broadcast duplicate suppression)
+ */
struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
+
+ /**
+ * @bcast_duplist_curr: index of last broadcast packet added to
+ * bcast_duplist
+ */
int bcast_duplist_curr;
- /* protects bcast_duplist & bcast_duplist_curr */
+
+ /**
+ * @bcast_duplist_lock: lock protecting bcast_duplist &
+ * bcast_duplist_curr
+ */
spinlock_t bcast_duplist_lock;
+
+ /** @claim_dest: local claim data (e.g. claim group) */
struct batadv_bla_claim_dst claim_dest;
+
+ /** @work: work queue callback item for cleanups & bla announcements */
struct delayed_work work;
};
#endif
@@ -708,68 +1062,94 @@ struct batadv_priv_bla {
/**
* struct batadv_priv_debug_log - debug logging data
- * @log_buff: buffer holding the logs (ring bufer)
- * @log_start: index of next character to read
- * @log_end: index of next character to write
- * @lock: lock protecting log_buff, log_start & log_end
- * @queue_wait: log reader's wait queue
*/
struct batadv_priv_debug_log {
+ /** @log_buff: buffer holding the logs (ring bufer) */
char log_buff[BATADV_LOG_BUF_LEN];
+
+ /** @log_start: index of next character to read */
unsigned long log_start;
+
+ /** @log_end: index of next character to write */
unsigned long log_end;
- spinlock_t lock; /* protects log_buff, log_start and log_end */
+
+ /** @lock: lock protecting log_buff, log_start & log_end */
+ spinlock_t lock;
+
+ /** @queue_wait: log reader's wait queue */
wait_queue_head_t queue_wait;
};
#endif
/**
* struct batadv_priv_gw - per mesh interface gateway data
- * @gateway_list: list of available gateway nodes
- * @list_lock: lock protecting gateway_list & curr_gw
- * @curr_gw: pointer to currently selected gateway node
- * @mode: gateway operation: off, client or server (see batadv_gw_modes)
- * @sel_class: gateway selection class (applies if gw_mode client)
- * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server)
- * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
- * @reselect: bool indicating a gateway re-selection is in progress
*/
struct batadv_priv_gw {
+ /** @gateway_list: list of available gateway nodes */
struct hlist_head gateway_list;
- spinlock_t list_lock; /* protects gateway_list & curr_gw */
- struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */
+
+ /** @list_lock: lock protecting gateway_list & curr_gw */
+ spinlock_t list_lock;
+
+ /** @curr_gw: pointer to currently selected gateway node */
+ struct batadv_gw_node __rcu *curr_gw;
+
+ /**
+ * @mode: gateway operation: off, client or server (see batadv_gw_modes)
+ */
atomic_t mode;
+
+ /** @sel_class: gateway selection class (applies if gw_mode client) */
atomic_t sel_class;
+
+ /**
+ * @bandwidth_down: advertised uplink download bandwidth (if gw_mode
+ * server)
+ */
atomic_t bandwidth_down;
+
+ /**
+ * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
+ */
atomic_t bandwidth_up;
+
+ /** @reselect: bool indicating a gateway re-selection is in progress */
atomic_t reselect;
};
/**
* struct batadv_priv_tvlv - per mesh interface tvlv data
- * @container_list: list of registered tvlv containers to be sent with each OGM
- * @handler_list: list of the various tvlv content handlers
- * @container_list_lock: protects tvlv container list access
- * @handler_list_lock: protects handler list access
*/
struct batadv_priv_tvlv {
+ /**
+ * @container_list: list of registered tvlv containers to be sent with
+ * each OGM
+ */
struct hlist_head container_list;
+
+ /** @handler_list: list of the various tvlv content handlers */
struct hlist_head handler_list;
- spinlock_t container_list_lock; /* protects container_list */
- spinlock_t handler_list_lock; /* protects handler_list */
+
+ /** @container_list_lock: protects tvlv container list access */
+ spinlock_t container_list_lock;
+
+ /** @handler_list_lock: protects handler list access */
+ spinlock_t handler_list_lock;
};
#ifdef CONFIG_BATMAN_ADV_DAT
/**
* struct batadv_priv_dat - per mesh interface DAT private data
- * @addr: node DAT address
- * @hash: hashtable representing the local ARP cache
- * @work: work queue callback item for cache purging
*/
struct batadv_priv_dat {
+ /** @addr: node DAT address */
batadv_dat_addr_t addr;
+
+ /** @hash: hashtable representing the local ARP cache */
struct batadv_hashtable *hash;
+
+ /** @work: work queue callback item for cache purging */
struct delayed_work work;
};
#endif
@@ -777,375 +1157,582 @@ struct batadv_priv_dat {
#ifdef CONFIG_BATMAN_ADV_MCAST
/**
* struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged
- * @exists: whether a querier exists in the mesh
- * @shadowing: if a querier exists, whether it is potentially shadowing
- * multicast listeners (i.e. querier is behind our own bridge segment)
*/
struct batadv_mcast_querier_state {
+ /** @exists: whether a querier exists in the mesh */
bool exists;
+
+ /**
+ * @shadowing: if a querier exists, whether it is potentially shadowing
+ * multicast listeners (i.e. querier is behind our own bridge segment)
+ */
bool shadowing;
};
/**
* struct batadv_priv_mcast - per mesh interface mcast data
- * @mla_list: list of multicast addresses we are currently announcing via TT
- * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable
- * multicast traffic
- * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic
- * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic
- * @querier_ipv4: the current state of an IGMP querier in the mesh
- * @querier_ipv6: the current state of an MLD querier in the mesh
- * @flags: the flags we have last sent in our mcast tvlv
- * @enabled: whether the multicast tvlv is currently enabled
- * @bridged: whether the soft interface has a bridge on top
- * @num_disabled: number of nodes that have no mcast tvlv
- * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic
- * @num_want_all_ipv4: counter for items in want_all_ipv4_list
- * @num_want_all_ipv6: counter for items in want_all_ipv6_list
- * @want_lists_lock: lock for protecting modifications to mcast want lists
- * (traversals are rcu-locked)
- * @work: work queue callback item for multicast TT and TVLV updates
*/
struct batadv_priv_mcast {
+ /**
+ * @mla_list: list of multicast addresses we are currently announcing
+ * via TT
+ */
struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */
+
+ /**
+ * @want_all_unsnoopables_list: a list of orig_nodes wanting all
+ * unsnoopable multicast traffic
+ */
struct hlist_head want_all_unsnoopables_list;
+
+ /**
+ * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast
+ * traffic
+ */
struct hlist_head want_all_ipv4_list;
+
+ /**
+ * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast
+ * traffic
+ */
struct hlist_head want_all_ipv6_list;
+
+ /** @querier_ipv4: the current state of an IGMP querier in the mesh */
struct batadv_mcast_querier_state querier_ipv4;
+
+ /** @querier_ipv6: the current state of an MLD querier in the mesh */
struct batadv_mcast_querier_state querier_ipv6;
+
+ /** @flags: the flags we have last sent in our mcast tvlv */
u8 flags;
+
+ /** @enabled: whether the multicast tvlv is currently enabled */
bool enabled;
+
+ /** @bridged: whether the soft interface has a bridge on top */
bool bridged;
+
+ /** @num_disabled: number of nodes that have no mcast tvlv */
atomic_t num_disabled;
+
+ /**
+ * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP
+ * traffic
+ */
atomic_t num_want_all_unsnoopables;
+
+ /** @num_want_all_ipv4: counter for items in want_all_ipv4_list */
atomic_t num_want_all_ipv4;
+
+ /** @num_want_all_ipv6: counter for items in want_all_ipv6_list */
atomic_t num_want_all_ipv6;
- /* protects want_all_{unsnoopables,ipv4,ipv6}_list */
+
+ /**
+ * @want_lists_lock: lock for protecting modifications to mcasts
+ * want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
+ */
spinlock_t want_lists_lock;
+
+ /** @work: work queue callback item for multicast TT and TVLV updates */
struct delayed_work work;
};
#endif
/**
* struct batadv_priv_nc - per mesh interface network coding private data
- * @work: work queue callback item for cleanup
- * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
- * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
- * @max_fwd_delay: maximum packet forward delay to allow coding of packets
- * @max_buffer_time: buffer time for sniffed packets used to decoding
- * @timestamp_fwd_flush: timestamp of last forward packet queue flush
- * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge
- * @coding_hash: Hash table used to buffer skbs while waiting for another
- * incoming skb to code it with. Skbs are added to the buffer just before being
- * forwarded in routing.c
- * @decoding_hash: Hash table used to buffer skbs that might be needed to decode
- * a received coded skb. The buffer is used for 1) skbs arriving on the
- * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs
- * forwarded by batman-adv.
*/
struct batadv_priv_nc {
+ /** @work: work queue callback item for cleanup */
struct delayed_work work;
+
+ /**
+ * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
+ */
struct dentry *debug_dir;
+
+ /**
+ * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
+ */
u8 min_tq;
+
+ /**
+ * @max_fwd_delay: maximum packet forward delay to allow coding of
+ * packets
+ */
u32 max_fwd_delay;
+
+ /**
+ * @max_buffer_time: buffer time for sniffed packets used to decoding
+ */
u32 max_buffer_time;
+
+ /**
+ * @timestamp_fwd_flush: timestamp of last forward packet queue flush
+ */
unsigned long timestamp_fwd_flush;
+
+ /**
+ * @timestamp_sniffed_purge: timestamp of last sniffed packet queue
+ * purge
+ */
unsigned long timestamp_sniffed_purge;
+
+ /**
+ * @coding_hash: Hash table used to buffer skbs while waiting for
+ * another incoming skb to code it with. Skbs are added to the buffer
+ * just before being forwarded in routing.c
+ */
struct batadv_hashtable *coding_hash;
+
+ /**
+ * @decoding_hash: Hash table used to buffer skbs that might be needed
+ * to decode a received coded skb. The buffer is used for 1) skbs
+ * arriving on the soft-interface; 2) skbs overheard on the
+ * hard-interface; and 3) skbs forwarded by batman-adv.
+ */
struct batadv_hashtable *decoding_hash;
};
/**
* struct batadv_tp_unacked - unacked packet meta-information
- * @seqno: seqno of the unacked packet
- * @len: length of the packet
- * @list: list node for batadv_tp_vars::unacked_list
*
* This struct is supposed to represent a buffer unacked packet. However, since
* the purpose of the TP meter is to count the traffic only, there is no need to
* store the entire sk_buff, the starting offset and the length are enough
*/
struct batadv_tp_unacked {
+ /** @seqno: seqno of the unacked packet */
u32 seqno;
+
+ /** @len: length of the packet */
u16 len;
+
+ /** @list: list node for &batadv_tp_vars.unacked_list */
struct list_head list;
};
/**
* enum batadv_tp_meter_role - Modus in tp meter session
- * @BATADV_TP_RECEIVER: Initialized as receiver
- * @BATADV_TP_SENDER: Initialized as sender
*/
enum batadv_tp_meter_role {
+ /** @BATADV_TP_RECEIVER: Initialized as receiver */
BATADV_TP_RECEIVER,
+
+ /** @BATADV_TP_SENDER: Initialized as sender */
BATADV_TP_SENDER
};
/**
* struct batadv_tp_vars - tp meter private variables per session
- * @list: list node for bat_priv::tp_list
- * @timer: timer for ack (receiver) and retry (sender)
- * @bat_priv: pointer to the mesh object
- * @start_time: start time in jiffies
- * @other_end: mac address of remote
- * @role: receiver/sender modi
- * @sending: sending binary semaphore: 1 if sending, 0 is not
- * @reason: reason for a stopped session
- * @finish_work: work item for the finishing procedure
- * @test_length: test length in milliseconds
- * @session: TP session identifier
- * @icmp_uid: local ICMP "socket" index
- * @dec_cwnd: decimal part of the cwnd used during linear growth
- * @cwnd: current size of the congestion window
- * @cwnd_lock: lock do protect @cwnd & @dec_cwnd
- * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
- * connection switches to the Congestion Avoidance state
- * @last_acked: last acked byte
- * @last_sent: last sent byte, not yet acked
- * @tot_sent: amount of data sent/ACKed so far
- * @dup_acks: duplicate ACKs counter
- * @fast_recovery: true if in Fast Recovery mode
- * @recover: last sent seqno when entering Fast Recovery
- * @rto: sender timeout
- * @srtt: smoothed RTT scaled by 2^3
- * @rttvar: RTT variation scaled by 2^2
- * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout
- * @prerandom_offset: offset inside the prerandom buffer
- * @prerandom_lock: spinlock protecting access to prerandom_offset
- * @last_recv: last in-order received packet
- * @unacked_list: list of unacked packets (meta-info only)
- * @unacked_lock: protect unacked_list
- * @last_recv_time: time time (jiffies) a msg was received
- * @refcount: number of context where the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_tp_vars {
+ /** @list: list node for &bat_priv.tp_list */
struct hlist_node list;
+
+ /** @timer: timer for ack (receiver) and retry (sender) */
struct timer_list timer;
+
+ /** @bat_priv: pointer to the mesh object */
struct batadv_priv *bat_priv;
+
+ /** @start_time: start time in jiffies */
unsigned long start_time;
+
+ /** @other_end: mac address of remote */
u8 other_end[ETH_ALEN];
+
+ /** @role: receiver/sender modi */
enum batadv_tp_meter_role role;
+
+ /** @sending: sending binary semaphore: 1 if sending, 0 is not */
atomic_t sending;
+
+ /** @reason: reason for a stopped session */
enum batadv_tp_meter_reason reason;
+
+ /** @finish_work: work item for the finishing procedure */
struct delayed_work finish_work;
+
+ /** @test_length: test length in milliseconds */
u32 test_length;
+
+ /** @session: TP session identifier */
u8 session[2];
+
+ /** @icmp_uid: local ICMP "socket" index */
u8 icmp_uid;
/* sender variables */
+
+ /** @dec_cwnd: decimal part of the cwnd used during linear growth */
u16 dec_cwnd;
+
+ /** @cwnd: current size of the congestion window */
u32 cwnd;
- spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */
+
+ /** @cwnd_lock: lock do protect @cwnd & @dec_cwnd */
+ spinlock_t cwnd_lock;
+
+ /**
+ * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
+ * connection switches to the Congestion Avoidance state
+ */
u32 ss_threshold;
+
+ /** @last_acked: last acked byte */
atomic_t last_acked;
+
+ /** @last_sent: last sent byte, not yet acked */
u32 last_sent;
+
+ /** @tot_sent: amount of data sent/ACKed so far */
atomic64_t tot_sent;
+
+ /** @dup_acks: duplicate ACKs counter */
atomic_t dup_acks;
+
+ /** @fast_recovery: true if in Fast Recovery mode */
bool fast_recovery;
+
+ /** @recover: last sent seqno when entering Fast Recovery */
u32 recover;
+
+ /** @rto: sender timeout */
u32 rto;
+
+ /** @srtt: smoothed RTT scaled by 2^3 */
u32 srtt;
+
+ /** @rttvar: RTT variation scaled by 2^2 */
u32 rttvar;
+
+ /**
+ * @more_bytes: waiting queue anchor when waiting for more ack/retry
+ * timeout
+ */
wait_queue_head_t more_bytes;
+
+ /** @prerandom_offset: offset inside the prerandom buffer */
u32 prerandom_offset;
- spinlock_t prerandom_lock; /* Protects prerandom_offset */
+
+ /** @prerandom_lock: spinlock protecting access to prerandom_offset */
+ spinlock_t prerandom_lock;
/* receiver variables */
+
+ /** @last_recv: last in-order received packet */
u32 last_recv;
+
+ /** @unacked_list: list of unacked packets (meta-info only) */
struct list_head unacked_list;
- spinlock_t unacked_lock; /* Protects unacked_list */
+
+ /** @unacked_lock: protect unacked_list */
+ spinlock_t unacked_lock;
+
+ /** @last_recv_time: time time (jiffies) a msg was received */
unsigned long last_recv_time;
+
+ /** @refcount: number of context where the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_softif_vlan - per VLAN attributes set
- * @bat_priv: pointer to the mesh object
- * @vid: VLAN identifier
- * @kobj: kobject for sysfs vlan subdirectory
- * @ap_isolation: AP isolation state
- * @tt: TT private attributes (VLAN specific)
- * @list: list node for bat_priv::softif_vlan_list
- * @refcount: number of context where this object is currently in use
- * @rcu: struct used for freeing in a RCU-safe manner
*/
struct batadv_softif_vlan {
+ /** @bat_priv: pointer to the mesh object */
struct batadv_priv *bat_priv;
+
+ /** @vid: VLAN identifier */
unsigned short vid;
+
+ /** @kobj: kobject for sysfs vlan subdirectory */
struct kobject *kobj;
+
+ /** @ap_isolation: AP isolation state */
atomic_t ap_isolation; /* boolean */
+
+ /** @tt: TT private attributes (VLAN specific) */
struct batadv_vlan_tt tt;
+
+ /** @list: list node for &bat_priv.softif_vlan_list */
struct hlist_node list;
+
+ /**
+ * @refcount: number of context where this object is currently in use
+ */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in a RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data
- * @ogm_buff: buffer holding the OGM packet
- * @ogm_buff_len: length of the OGM packet buffer
- * @ogm_seqno: OGM sequence number - used to identify each OGM
- * @ogm_wq: workqueue used to schedule OGM transmissions
*/
struct batadv_priv_bat_v {
+ /** @ogm_buff: buffer holding the OGM packet */
unsigned char *ogm_buff;
+
+ /** @ogm_buff_len: length of the OGM packet buffer */
int ogm_buff_len;
+
+ /** @ogm_seqno: OGM sequence number - used to identify each OGM */
atomic_t ogm_seqno;
+
+ /** @ogm_wq: workqueue used to schedule OGM transmissions */
struct delayed_work ogm_wq;
};
/**
* struct batadv_priv - per mesh interface data
- * @mesh_state: current status of the mesh (inactive/active/deactivating)
- * @soft_iface: net device which holds this struct as private data
- * @bat_counters: mesh internal traffic statistic counters (see batadv_counters)
- * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
- * @bonding: bool indicating whether traffic bonding is enabled
- * @fragmentation: bool indicating whether traffic fragmentation is enabled
- * @packet_size_max: max packet size that can be transmitted via
- * multiple fragmented skbs or a single frame if fragmentation is disabled
- * @frag_seqno: incremental counter to identify chains of egress fragments
- * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
- * enabled
- * @distributed_arp_table: bool indicating whether distributed ARP table is
- * enabled
- * @multicast_mode: Enable or disable multicast optimizations on this node's
- * sender/originating side
- * @orig_interval: OGM broadcast interval in milliseconds
- * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
- * @log_level: configured log level (see batadv_dbg_level)
- * @isolation_mark: the skb->mark value used to match packets for AP isolation
- * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be used
- * for the isolation mark
- * @bcast_seqno: last sent broadcast packet sequence number
- * @bcast_queue_left: number of remaining buffered broadcast packet slots
- * @batman_queue_left: number of remaining OGM packet slots
- * @num_ifaces: number of interfaces assigned to this mesh interface
- * @mesh_obj: kobject for sysfs mesh subdirectory
- * @debug_dir: dentry for debugfs batman-adv subdirectory
- * @forw_bat_list: list of aggregated OGMs that will be forwarded
- * @forw_bcast_list: list of broadcast packets that will be rebroadcasted
- * @tp_list: list of tp sessions
- * @tp_num: number of currently active tp sessions
- * @orig_hash: hash table containing mesh participants (orig nodes)
- * @forw_bat_list_lock: lock protecting forw_bat_list
- * @forw_bcast_list_lock: lock protecting forw_bcast_list
- * @tp_list_lock: spinlock protecting @tp_list
- * @orig_work: work queue callback item for orig node purging
- * @primary_if: one of the hard-interfaces assigned to this mesh interface
- * becomes the primary interface
- * @algo_ops: routing algorithm used by this mesh interface
- * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
- * of the mesh interface represented by this object
- * @softif_vlan_list_lock: lock protecting softif_vlan_list
- * @bla: bridge loope avoidance data
- * @debug_log: holding debug logging relevant data
- * @gw: gateway data
- * @tt: translation table data
- * @tvlv: type-version-length-value data
- * @dat: distributed arp table data
- * @mcast: multicast data
- * @network_coding: bool indicating whether network coding is enabled
- * @nc: network coding data
- * @bat_v: B.A.T.M.A.N. V per soft-interface private data
*/
struct batadv_priv {
+ /**
+ * @mesh_state: current status of the mesh
+ * (inactive/active/deactivating)
+ */
atomic_t mesh_state;
+
+ /** @soft_iface: net device which holds this struct as private data */
struct net_device *soft_iface;
+
+ /**
+ * @bat_counters: mesh internal traffic statistic counters (see
+ * batadv_counters)
+ */
u64 __percpu *bat_counters; /* Per cpu counters */
+
+ /**
+ * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
+ */
atomic_t aggregated_ogms;
+
+ /** @bonding: bool indicating whether traffic bonding is enabled */
atomic_t bonding;
+
+ /**
+ * @fragmentation: bool indicating whether traffic fragmentation is
+ * enabled
+ */
atomic_t fragmentation;
+
+ /**
+ * @packet_size_max: max packet size that can be transmitted via
+ * multiple fragmented skbs or a single frame if fragmentation is
+ * disabled
+ */
atomic_t packet_size_max;
+
+ /**
+ * @frag_seqno: incremental counter to identify chains of egress
+ * fragments
+ */
atomic_t frag_seqno;
+
#ifdef CONFIG_BATMAN_ADV_BLA
+ /**
+ * @bridge_loop_avoidance: bool indicating whether bridge loop
+ * avoidance is enabled
+ */
atomic_t bridge_loop_avoidance;
#endif
+
#ifdef CONFIG_BATMAN_ADV_DAT
+ /**
+ * @distributed_arp_table: bool indicating whether distributed ARP table
+ * is enabled
+ */
atomic_t distributed_arp_table;
#endif
+
#ifdef CONFIG_BATMAN_ADV_MCAST
+ /**
+ * @multicast_mode: Enable or disable multicast optimizations on this
+ * node's sender/originating side
+ */
atomic_t multicast_mode;
#endif
+
+ /** @orig_interval: OGM broadcast interval in milliseconds */
atomic_t orig_interval;
+
+ /**
+ * @hop_penalty: penalty which will be applied to an OGM's tq-field on
+ * every hop
+ */
atomic_t hop_penalty;
+
#ifdef CONFIG_BATMAN_ADV_DEBUG
+ /** @log_level: configured log level (see batadv_dbg_level) */
atomic_t log_level;
#endif
+
+ /**
+ * @isolation_mark: the skb->mark value used to match packets for AP
+ * isolation
+ */
u32 isolation_mark;
+
+ /**
+ * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be
+ * used for the isolation mark
+ */
u32 isolation_mark_mask;
+
+ /** @bcast_seqno: last sent broadcast packet sequence number */
atomic_t bcast_seqno;
+
+ /**
+ * @bcast_queue_left: number of remaining buffered broadcast packet
+ * slots
+ */
atomic_t bcast_queue_left;
+
+ /** @batman_queue_left: number of remaining OGM packet slots */
atomic_t batman_queue_left;
+
+ /** @num_ifaces: number of interfaces assigned to this mesh interface */
char num_ifaces;
+
+ /** @mesh_obj: kobject for sysfs mesh subdirectory */
struct kobject *mesh_obj;
+
+ /** @debug_dir: dentry for debugfs batman-adv subdirectory */
struct dentry *debug_dir;
+
+ /** @forw_bat_list: list of aggregated OGMs that will be forwarded */
struct hlist_head forw_bat_list;
+
+ /**
+ * @forw_bcast_list: list of broadcast packets that will be
+ * rebroadcasted
+ */
struct hlist_head forw_bcast_list;
+
+ /** @tp_list: list of tp sessions */
struct hlist_head tp_list;
+
+ /** @tp_num: number of currently active tp sessions */
struct batadv_hashtable *orig_hash;
- spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
- spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
- spinlock_t tp_list_lock; /* protects tp_list */
+
+ /** @orig_hash: hash table containing mesh participants (orig nodes) */
+ spinlock_t forw_bat_list_lock;
+
+ /** @forw_bat_list_lock: lock protecting forw_bat_list */
+ spinlock_t forw_bcast_list_lock;
+
+ /** @forw_bcast_list_lock: lock protecting forw_bcast_list */
+ spinlock_t tp_list_lock;
+
+ /** @tp_list_lock: spinlock protecting @tp_list */
atomic_t tp_num;
+
+ /** @orig_work: work queue callback item for orig node purging */
struct delayed_work orig_work;
+
+ /**
+ * @primary_if: one of the hard-interfaces assigned to this mesh
+ * interface becomes the primary interface
+ */
struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
+
+ /** @algo_ops: routing algorithm used by this mesh interface */
struct batadv_algo_ops *algo_ops;
+
+ /**
+ * @softif_vlan_list: a list of softif_vlan structs, one per VLAN
+ * created on top of the mesh interface represented by this object
+ */
struct hlist_head softif_vlan_list;
- spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */
+
+ /** @softif_vlan_list_lock: lock protecting softif_vlan_list */
+ spinlock_t softif_vlan_list_lock;
+
#ifdef CONFIG_BATMAN_ADV_BLA
+ /** @bla: bridge loope avoidance data */
struct batadv_priv_bla bla;
#endif
+
#ifdef CONFIG_BATMAN_ADV_DEBUG
+ /** @debug_log: holding debug logging relevant data */
struct batadv_priv_debug_log *debug_log;
#endif
+
+ /** @gw: gateway data */
struct batadv_priv_gw gw;
+
+ /** @tt: translation table data */
struct batadv_priv_tt tt;
+
+ /** @tvlv: type-version-length-value data */
struct batadv_priv_tvlv tvlv;
+
#ifdef CONFIG_BATMAN_ADV_DAT
+ /** @dat: distributed arp table data */
struct batadv_priv_dat dat;
#endif
+
#ifdef CONFIG_BATMAN_ADV_MCAST
+ /** @mcast: multicast data */
struct batadv_priv_mcast mcast;
#endif
+
#ifdef CONFIG_BATMAN_ADV_NC
+ /**
+ * @network_coding: bool indicating whether network coding is enabled
+ */
atomic_t network_coding;
+
+ /** @nc: network coding data */
struct batadv_priv_nc nc;
#endif /* CONFIG_BATMAN_ADV_NC */
+
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+ /** @bat_v: B.A.T.M.A.N. V per soft-interface private data */
struct batadv_priv_bat_v bat_v;
#endif
};
/**
* struct batadv_socket_client - layer2 icmp socket client data
- * @queue_list: packet queue for packets destined for this socket client
- * @queue_len: number of packets in the packet queue (queue_list)
- * @index: socket client's index in the batadv_socket_client_hash
- * @lock: lock protecting queue_list, queue_len & index
- * @queue_wait: socket client's wait queue
- * @bat_priv: pointer to soft_iface this client belongs to
*/
struct batadv_socket_client {
+ /**
+ * @queue_list: packet queue for packets destined for this socket client
+ */
struct list_head queue_list;
+
+ /** @queue_len: number of packets in the packet queue (queue_list) */
unsigned int queue_len;
+
+ /** @index: socket client's index in the batadv_socket_client_hash */
unsigned char index;
- spinlock_t lock; /* protects queue_list, queue_len & index */
+
+ /** @lock: lock protecting queue_list, queue_len & index */
+ spinlock_t lock;
+
+ /** @queue_wait: socket client's wait queue */
wait_queue_head_t queue_wait;
+
+ /** @bat_priv: pointer to soft_iface this client belongs to */
struct batadv_priv *bat_priv;
};
/**
* struct batadv_socket_packet - layer2 icmp packet for socket client
- * @list: list node for batadv_socket_client::queue_list
- * @icmp_len: size of the layer2 icmp packet
- * @icmp_packet: layer2 icmp packet
*/
struct batadv_socket_packet {
+ /** @list: list node for &batadv_socket_client.queue_list */
struct list_head list;
+
+ /** @icmp_len: size of the layer2 icmp packet */
size_t icmp_len;
+
+ /** @icmp_packet: layer2 icmp packet */
u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
};
@@ -1153,312 +1740,432 @@ struct batadv_socket_packet {
/**
* struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN
- * @orig: originator address of backbone node (mac address of primary iface)
- * @vid: vlan id this gateway was detected on
- * @hash_entry: hlist node for batadv_priv_bla::backbone_hash
- * @bat_priv: pointer to soft_iface this backbone gateway belongs to
- * @lasttime: last time we heard of this backbone gw
- * @wait_periods: grace time for bridge forward delays and bla group forming at
- * bootup phase - no bcast traffic is formwared until it has elapsed
- * @request_sent: if this bool is set to true we are out of sync with this
- * backbone gateway - no bcast traffic is formwared until the situation was
- * resolved
- * @crc: crc16 checksum over all claims
- * @crc_lock: lock protecting crc
- * @report_work: work struct for reporting detected loops
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_bla_backbone_gw {
+ /**
+ * @orig: originator address of backbone node (mac address of primary
+ * iface)
+ */
u8 orig[ETH_ALEN];
+
+ /** @vid: vlan id this gateway was detected on */
unsigned short vid;
+
+ /** @hash_entry: hlist node for &batadv_priv_bla.backbone_hash */
struct hlist_node hash_entry;
+
+ /** @bat_priv: pointer to soft_iface this backbone gateway belongs to */
struct batadv_priv *bat_priv;
+
+ /** @lasttime: last time we heard of this backbone gw */
unsigned long lasttime;
+
+ /**
+ * @wait_periods: grace time for bridge forward delays and bla group
+ * forming at bootup phase - no bcast traffic is formwared until it has
+ * elapsed
+ */
atomic_t wait_periods;
+
+ /**
+ * @request_sent: if this bool is set to true we are out of sync with
+ * this backbone gateway - no bcast traffic is formwared until the
+ * situation was resolved
+ */
atomic_t request_sent;
+
+ /** @crc: crc16 checksum over all claims */
u16 crc;
- spinlock_t crc_lock; /* protects crc */
+
+ /** @crc_lock: lock protecting crc */
+ spinlock_t crc_lock;
+
+ /** @report_work: work struct for reporting detected loops */
struct work_struct report_work;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_bla_claim - claimed non-mesh client structure
- * @addr: mac address of claimed non-mesh client
- * @vid: vlan id this client was detected on
- * @backbone_gw: pointer to backbone gw claiming this client
- * @backbone_lock: lock protecting backbone_gw pointer
- * @lasttime: last time we heard of claim (locals only)
- * @hash_entry: hlist node for batadv_priv_bla::claim_hash
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_bla_claim {
+ /** @addr: mac address of claimed non-mesh client */
u8 addr[ETH_ALEN];
+
+ /** @vid: vlan id this client was detected on */
unsigned short vid;
+
+ /** @backbone_gw: pointer to backbone gw claiming this client */
struct batadv_bla_backbone_gw *backbone_gw;
- spinlock_t backbone_lock; /* protects backbone_gw */
+
+ /** @backbone_lock: lock protecting backbone_gw pointer */
+ spinlock_t backbone_lock;
+
+ /** @lasttime: last time we heard of claim (locals only) */
unsigned long lasttime;
+
+ /** @hash_entry: hlist node for &batadv_priv_bla.claim_hash */
struct hlist_node hash_entry;
+
+ /** @refcount: number of contexts the object is used */
struct rcu_head rcu;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct kref refcount;
};
#endif
/**
* struct batadv_tt_common_entry - tt local & tt global common data
- * @addr: mac address of non-mesh client
- * @vid: VLAN identifier
- * @hash_entry: hlist node for batadv_priv_tt::local_hash or for
- * batadv_priv_tt::global_hash
- * @flags: various state handling flags (see batadv_tt_client_flags)
- * @added_at: timestamp used for purging stale tt common entries
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_tt_common_entry {
+ /** @addr: mac address of non-mesh client */
u8 addr[ETH_ALEN];
+
+ /** @vid: VLAN identifier */
unsigned short vid;
+
+ /**
+ * @hash_entry: hlist node for &batadv_priv_tt.local_hash or for
+ * &batadv_priv_tt.global_hash
+ */
struct hlist_node hash_entry;
+
+ /** @flags: various state handling flags (see batadv_tt_client_flags) */
u16 flags;
+
+ /** @added_at: timestamp used for purging stale tt common entries */
unsigned long added_at;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_tt_local_entry - translation table local entry data
- * @common: general translation table data
- * @last_seen: timestamp used for purging stale tt local entries
- * @vlan: soft-interface vlan of the entry
*/
struct batadv_tt_local_entry {
+ /** @common: general translation table data */
struct batadv_tt_common_entry common;
+
+ /** @last_seen: timestamp used for purging stale tt local entries */
unsigned long last_seen;
+
+ /** @vlan: soft-interface vlan of the entry */
struct batadv_softif_vlan *vlan;
};
/**
* struct batadv_tt_global_entry - translation table global entry data
- * @common: general translation table data
- * @orig_list: list of orig nodes announcing this non-mesh client
- * @orig_list_count: number of items in the orig_list
- * @list_lock: lock protecting orig_list
- * @roam_at: time at which TT_GLOBAL_ROAM was set
*/
struct batadv_tt_global_entry {
+ /** @common: general translation table data */
struct batadv_tt_common_entry common;
+
+ /** @orig_list: list of orig nodes announcing this non-mesh client */
struct hlist_head orig_list;
+
+ /** @orig_list_count: number of items in the orig_list */
atomic_t orig_list_count;
- spinlock_t list_lock; /* protects orig_list */
+
+ /** @list_lock: lock protecting orig_list */
+ spinlock_t list_lock;
+
+ /** @roam_at: time at which TT_GLOBAL_ROAM was set */
unsigned long roam_at;
};
/**
* struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client
- * @orig_node: pointer to orig node announcing this non-mesh client
- * @ttvn: translation table version number which added the non-mesh client
- * @flags: per orig entry TT sync flags
- * @list: list node for batadv_tt_global_entry::orig_list
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_tt_orig_list_entry {
+ /** @orig_node: pointer to orig node announcing this non-mesh client */
struct batadv_orig_node *orig_node;
+
+ /**
+ * @ttvn: translation table version number which added the non-mesh
+ * client
+ */
u8 ttvn;
+
+ /** @flags: per orig entry TT sync flags */
u8 flags;
+
+ /** @list: list node for &batadv_tt_global_entry.orig_list */
struct hlist_node list;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_tt_change_node - structure for tt changes occurred
- * @list: list node for batadv_priv_tt::changes_list
- * @change: holds the actual translation table diff data
*/
struct batadv_tt_change_node {
+ /** @list: list node for &batadv_priv_tt.changes_list */
struct list_head list;
+
+ /** @change: holds the actual translation table diff data */
struct batadv_tvlv_tt_change change;
};
/**
* struct batadv_tt_req_node - data to keep track of the tt requests in flight
- * @addr: mac address address of the originator this request was sent to
- * @issued_at: timestamp used for purging stale tt requests
- * @refcount: number of contexts the object is used by
- * @list: list node for batadv_priv_tt::req_list
*/
struct batadv_tt_req_node {
+ /**
+ * @addr: mac address address of the originator this request was sent to
+ */
u8 addr[ETH_ALEN];
+
+ /** @issued_at: timestamp used for purging stale tt requests */
unsigned long issued_at;
+
+ /** @refcount: number of contexts the object is used by */
struct kref refcount;
+
+ /** @list: list node for &batadv_priv_tt.req_list */
struct hlist_node list;
};
/**
* struct batadv_tt_roam_node - roaming client data
- * @addr: mac address of the client in the roaming phase
- * @counter: number of allowed roaming events per client within a single
- * OGM interval (changes are committed with each OGM)
- * @first_time: timestamp used for purging stale roaming node entries
- * @list: list node for batadv_priv_tt::roam_list
*/
struct batadv_tt_roam_node {
+ /** @addr: mac address of the client in the roaming phase */
u8 addr[ETH_ALEN];
+
+ /**
+ * @counter: number of allowed roaming events per client within a single
+ * OGM interval (changes are committed with each OGM)
+ */
atomic_t counter;
+
+ /**
+ * @first_time: timestamp used for purging stale roaming node entries
+ */
unsigned long first_time;
+
+ /** @list: list node for &batadv_priv_tt.roam_list */
struct list_head list;
};
/**
* struct batadv_nc_node - network coding node
- * @list: next and prev pointer for the list handling
- * @addr: the node's mac address
- * @refcount: number of contexts the object is used by
- * @rcu: struct used for freeing in an RCU-safe manner
- * @orig_node: pointer to corresponding orig node struct
- * @last_seen: timestamp of last ogm received from this node
*/
struct batadv_nc_node {
+ /** @list: next and prev pointer for the list handling */
struct list_head list;
+
+ /** @addr: the node's mac address */
u8 addr[ETH_ALEN];
+
+ /** @refcount: number of contexts the object is used by */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
+
+ /** @orig_node: pointer to corresponding orig node struct */
struct batadv_orig_node *orig_node;
+
+ /** @last_seen: timestamp of last ogm received from this node */
unsigned long last_seen;
};
/**
* struct batadv_nc_path - network coding path
- * @hash_entry: next and prev pointer for the list handling
- * @rcu: struct used for freeing in an RCU-safe manner
- * @refcount: number of contexts the object is used by
- * @packet_list: list of buffered packets for this path
- * @packet_list_lock: access lock for packet list
- * @next_hop: next hop (destination) of path
- * @prev_hop: previous hop (source) of path
- * @last_valid: timestamp for last validation of path
*/
struct batadv_nc_path {
+ /** @hash_entry: next and prev pointer for the list handling */
struct hlist_node hash_entry;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
+
+ /** @refcount: number of contexts the object is used by */
struct kref refcount;
+
+ /** @packet_list: list of buffered packets for this path */
struct list_head packet_list;
- spinlock_t packet_list_lock; /* Protects packet_list */
+
+ /** @packet_list_lock: access lock for packet list */
+ spinlock_t packet_list_lock;
+
+ /** @next_hop: next hop (destination) of path */
u8 next_hop[ETH_ALEN];
+
+ /** @prev_hop: previous hop (source) of path */
u8 prev_hop[ETH_ALEN];
+
+ /** @last_valid: timestamp for last validation of path */
unsigned long last_valid;
};
/**
* struct batadv_nc_packet - network coding packet used when coding and
* decoding packets
- * @list: next and prev pointer for the list handling
- * @packet_id: crc32 checksum of skb data
- * @timestamp: field containing the info when the packet was added to path
- * @neigh_node: pointer to original next hop neighbor of skb
- * @skb: skb which can be encoded or used for decoding
- * @nc_path: pointer to path this nc packet is attached to
*/
struct batadv_nc_packet {
+ /** @list: next and prev pointer for the list handling */
struct list_head list;
+
+ /** @packet_id: crc32 checksum of skb data */
__be32 packet_id;
+
+ /**
+ * @timestamp: field containing the info when the packet was added to
+ * path
+ */
unsigned long timestamp;
+
+ /** @neigh_node: pointer to original next hop neighbor of skb */
struct batadv_neigh_node *neigh_node;
+
+ /** @skb: skb which can be encoded or used for decoding */
struct sk_buff *skb;
+
+ /** @nc_path: pointer to path this nc packet is attached to */
struct batadv_nc_path *nc_path;
};
/**
* struct batadv_skb_cb - control buffer structure used to store private data
* relevant to batman-adv in the skb->cb buffer in skbs.
- * @decoded: Marks a skb as decoded, which is checked when searching for coding
- * opportunities in network-coding.c
- * @num_bcasts: Counter for broadcast packet retransmissions
*/
struct batadv_skb_cb {
+ /**
+ * @decoded: Marks a skb as decoded, which is checked when searching for
+ * coding opportunities in network-coding.c
+ */
bool decoded;
+
+ /** @num_bcasts: Counter for broadcast packet retransmissions */
unsigned int num_bcasts;
};
/**
* struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
- * @list: list node for batadv_priv::forw_{bat,bcast}_list
- * @cleanup_list: list node for purging functions
- * @send_time: execution time for delayed_work (packet sending)
- * @own: bool for locally generated packets (local OGMs are re-scheduled after
- * sending)
- * @skb: bcast packet's skb buffer
- * @packet_len: size of aggregated OGM packet inside the skb buffer
- * @direct_link_flags: direct link flags for aggregated OGM packets
- * @num_packets: counter for aggregated OGMv1 packets
- * @delayed_work: work queue callback item for packet sending
- * @if_incoming: pointer to incoming hard-iface or primary iface if
- * locally generated packet
- * @if_outgoing: packet where the packet should be sent to, or NULL if
- * unspecified
- * @queue_left: The queue (counter) this packet was applied to
*/
struct batadv_forw_packet {
+ /**
+ * @list: list node for &batadv_priv.forw.bcast_list and
+ * &batadv_priv.forw.bat_list
+ */
struct hlist_node list;
+
+ /** @cleanup_list: list node for purging functions */
struct hlist_node cleanup_list;
+
+ /** @send_time: execution time for delayed_work (packet sending) */
unsigned long send_time;
+
+ /**
+ * @own: bool for locally generated packets (local OGMs are re-scheduled
+ * after sending)
+ */
u8 own;
+
+ /** @skb: bcast packet's skb buffer */
struct sk_buff *skb;
+
+ /** @packet_len: size of aggregated OGM packet inside the skb buffer */
u16 packet_len;
+
+ /** @direct_link_flags: direct link flags for aggregated OGM packets */
u32 direct_link_flags;
+
+ /** @num_packets: counter for aggregated OGMv1 packets */
u8 num_packets;
+
+ /** @delayed_work: work queue callback item for packet sending */
struct delayed_work delayed_work;
+
+ /**
+ * @if_incoming: pointer to incoming hard-iface or primary iface if
+ * locally generated packet
+ */
struct batadv_hard_iface *if_incoming;
+
+ /**
+ * @if_outgoing: packet where the packet should be sent to, or NULL if
+ * unspecified
+ */
struct batadv_hard_iface *if_outgoing;
+
+ /** @queue_left: The queue (counter) this packet was applied to */
atomic_t *queue_left;
};
/**
* struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
- * @activate: start routing mechanisms when hard-interface is brought up
- * (optional)
- * @enable: init routing info when hard-interface is enabled
- * @disable: de-init routing info when hard-interface is disabled
- * @update_mac: (re-)init mac addresses of the protocol information
- * belonging to this hard-interface
- * @primary_set: called when primary interface is selected / changed
*/
struct batadv_algo_iface_ops {
+ /**
+ * @activate: start routing mechanisms when hard-interface is brought up
+ * (optional)
+ */
void (*activate)(struct batadv_hard_iface *hard_iface);
+
+ /** @enable: init routing info when hard-interface is enabled */
int (*enable)(struct batadv_hard_iface *hard_iface);
+
+ /** @disable: de-init routing info when hard-interface is disabled */
void (*disable)(struct batadv_hard_iface *hard_iface);
+
+ /**
+ * @update_mac: (re-)init mac addresses of the protocol information
+ * belonging to this hard-interface
+ */
void (*update_mac)(struct batadv_hard_iface *hard_iface);
+
+ /** @primary_set: called when primary interface is selected / changed */
void (*primary_set)(struct batadv_hard_iface *hard_iface);
};
/**
* struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
- * @hardif_init: called on creation of single hop entry
- * (optional)
- * @cmp: compare the metrics of two neighbors for their respective outgoing
- * interfaces
- * @is_similar_or_better: check if neigh1 is equally similar or better than
- * neigh2 for their respective outgoing interface from the metric prospective
- * @print: print the single hop neighbor list (optional)
- * @dump: dump neighbors to a netlink socket (optional)
*/
struct batadv_algo_neigh_ops {
+ /** @hardif_init: called on creation of single hop entry (optional) */
void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
+
+ /**
+ * @cmp: compare the metrics of two neighbors for their respective
+ * outgoing interfaces
+ */
int (*cmp)(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2);
+
+ /**
+ * @is_similar_or_better: check if neigh1 is equally similar or better
+ * than neigh2 for their respective outgoing interface from the metric
+ * prospective
+ */
bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2);
+
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ /** @print: print the single hop neighbor list (optional) */
void (*print)(struct batadv_priv *priv, struct seq_file *seq);
#endif
+
+ /** @dump: dump neighbors to a netlink socket (optional) */
void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
struct batadv_priv *priv,
struct batadv_hard_iface *hard_iface);
@@ -1466,24 +2173,36 @@ struct batadv_algo_neigh_ops {
/**
* struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
- * @free: free the resources allocated by the routing algorithm for an orig_node
- * object (optional)
- * @add_if: ask the routing algorithm to apply the needed changes to the
- * orig_node due to a new hard-interface being added into the mesh (optional)
- * @del_if: ask the routing algorithm to apply the needed changes to the
- * orig_node due to an hard-interface being removed from the mesh (optional)
- * @print: print the originator table (optional)
- * @dump: dump originators to a netlink socket (optional)
*/
struct batadv_algo_orig_ops {
+ /**
+ * @free: free the resources allocated by the routing algorithm for an
+ * orig_node object (optional)
+ */
void (*free)(struct batadv_orig_node *orig_node);
+
+ /**
+ * @add_if: ask the routing algorithm to apply the needed changes to the
+ * orig_node due to a new hard-interface being added into the mesh
+ * (optional)
+ */
int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+
+ /**
+ * @del_if: ask the routing algorithm to apply the needed changes to the
+ * orig_node due to an hard-interface being removed from the mesh
+ * (optional)
+ */
int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
int del_if_num);
+
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ /** @print: print the originator table (optional) */
void (*print)(struct batadv_priv *priv, struct seq_file *seq,
struct batadv_hard_iface *hard_iface);
#endif
+
+ /** @dump: dump originators to a netlink socket (optional) */
void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
struct batadv_priv *priv,
struct batadv_hard_iface *hard_iface);
@@ -1491,158 +2210,213 @@ struct batadv_algo_orig_ops {
/**
* struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
- * @init_sel_class: initialize GW selection class (optional)
- * @store_sel_class: parse and stores a new GW selection class (optional)
- * @show_sel_class: prints the current GW selection class (optional)
- * @get_best_gw_node: select the best GW from the list of available nodes
- * (optional)
- * @is_eligible: check if a newly discovered GW is a potential candidate for
- * the election as best GW (optional)
- * @print: print the gateway table (optional)
- * @dump: dump gateways to a netlink socket (optional)
*/
struct batadv_algo_gw_ops {
+ /** @init_sel_class: initialize GW selection class (optional) */
void (*init_sel_class)(struct batadv_priv *bat_priv);
+
+ /**
+ * @store_sel_class: parse and stores a new GW selection class
+ * (optional)
+ */
ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
size_t count);
+
+ /** @show_sel_class: prints the current GW selection class (optional) */
ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
+
+ /**
+ * @get_best_gw_node: select the best GW from the list of available
+ * nodes (optional)
+ */
struct batadv_gw_node *(*get_best_gw_node)
(struct batadv_priv *bat_priv);
+
+ /**
+ * @is_eligible: check if a newly discovered GW is a potential candidate
+ * for the election as best GW (optional)
+ */
bool (*is_eligible)(struct batadv_priv *bat_priv,
struct batadv_orig_node *curr_gw_orig,
struct batadv_orig_node *orig_node);
+
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ /** @print: print the gateway table (optional) */
void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
#endif
+
+ /** @dump: dump gateways to a netlink socket (optional) */
void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
struct batadv_priv *priv);
};
/**
* struct batadv_algo_ops - mesh algorithm callbacks
- * @list: list node for the batadv_algo_list
- * @name: name of the algorithm
- * @iface: callbacks related to interface handling
- * @neigh: callbacks related to neighbors handling
- * @orig: callbacks related to originators handling
- * @gw: callbacks related to GW mode
*/
struct batadv_algo_ops {
+ /** @list: list node for the batadv_algo_list */
struct hlist_node list;
+
+ /** @name: name of the algorithm */
char *name;
+
+ /** @iface: callbacks related to interface handling */
struct batadv_algo_iface_ops iface;
+
+ /** @neigh: callbacks related to neighbors handling */
struct batadv_algo_neigh_ops neigh;
+
+ /** @orig: callbacks related to originators handling */
struct batadv_algo_orig_ops orig;
+
+ /** @gw: callbacks related to GW mode */
struct batadv_algo_gw_ops gw;
};
/**
* struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It
* is used to stored ARP entries needed for the global DAT cache
- * @ip: the IPv4 corresponding to this DAT/ARP entry
- * @mac_addr: the MAC address associated to the stored IPv4
- * @vid: the vlan ID associated to this entry
- * @last_update: time in jiffies when this entry was refreshed last time
- * @hash_entry: hlist node for batadv_priv_dat::hash
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_dat_entry {
+ /** @ip: the IPv4 corresponding to this DAT/ARP entry */
__be32 ip;
+
+ /** @mac_addr: the MAC address associated to the stored IPv4 */
u8 mac_addr[ETH_ALEN];
+
+ /** @vid: the vlan ID associated to this entry */
unsigned short vid;
+
+ /**
+ * @last_update: time in jiffies when this entry was refreshed last time
+ */
unsigned long last_update;
+
+ /** @hash_entry: hlist node for &batadv_priv_dat.hash */
struct hlist_node hash_entry;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* struct batadv_hw_addr - a list entry for a MAC address
- * @list: list node for the linking of entries
- * @addr: the MAC address of this list entry
*/
struct batadv_hw_addr {
+ /** @list: list node for the linking of entries */
struct hlist_node list;
+
+ /** @addr: the MAC address of this list entry */
unsigned char addr[ETH_ALEN];
};
/**
* struct batadv_dat_candidate - candidate destination for DAT operations
- * @type: the type of the selected candidate. It can one of the following:
- * - BATADV_DAT_CANDIDATE_NOT_FOUND
- * - BATADV_DAT_CANDIDATE_ORIG
- * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to the
- * corresponding originator node structure
*/
struct batadv_dat_candidate {
+ /**
+ * @type: the type of the selected candidate. It can one of the
+ * following:
+ * - BATADV_DAT_CANDIDATE_NOT_FOUND
+ * - BATADV_DAT_CANDIDATE_ORIG
+ */
int type;
+
+ /**
+ * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to
+ * the corresponding originator node structure
+ */
struct batadv_orig_node *orig_node;
};
/**
* struct batadv_tvlv_container - container for tvlv appended to OGMs
- * @list: hlist node for batadv_priv_tvlv::container_list
- * @tvlv_hdr: tvlv header information needed to construct the tvlv
- * @refcount: number of contexts the object is used
*/
struct batadv_tvlv_container {
+ /** @list: hlist node for &batadv_priv_tvlv.container_list */
struct hlist_node list;
+
+ /** @tvlv_hdr: tvlv header information needed to construct the tvlv */
struct batadv_tvlv_hdr tvlv_hdr;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
};
/**
* struct batadv_tvlv_handler - handler for specific tvlv type and version
- * @list: hlist node for batadv_priv_tvlv::handler_list
- * @ogm_handler: handler callback which is given the tvlv payload to process on
- * incoming OGM packets
- * @unicast_handler: handler callback which is given the tvlv payload to process
- * on incoming unicast tvlv packets
- * @type: tvlv type this handler feels responsible for
- * @version: tvlv version this handler feels responsible for
- * @flags: tvlv handler flags
- * @refcount: number of contexts the object is used
- * @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_tvlv_handler {
+ /** @list: hlist node for &batadv_priv_tvlv.handler_list */
struct hlist_node list;
+
+ /**
+ * @ogm_handler: handler callback which is given the tvlv payload to
+ * process on incoming OGM packets
+ */
void (*ogm_handler)(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
u8 flags, void *tvlv_value, u16 tvlv_value_len);
+
+ /**
+ * @unicast_handler: handler callback which is given the tvlv payload to
+ * process on incoming unicast tvlv packets
+ */
int (*unicast_handler)(struct batadv_priv *bat_priv,
u8 *src, u8 *dst,
void *tvlv_value, u16 tvlv_value_len);
+
+ /** @type: tvlv type this handler feels responsible for */
u8 type;
+
+ /** @version: tvlv version this handler feels responsible for */
u8 version;
+
+ /** @flags: tvlv handler flags */
u8 flags;
+
+ /** @refcount: number of contexts the object is used */
struct kref refcount;
+
+ /** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
};
/**
* enum batadv_tvlv_handler_flags - tvlv handler flags definitions
- * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function will call
- * this handler even if its type was not found (with no data)
- * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the API marks
- * a handler as being called, so it won't be called if the
- * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set
*/
enum batadv_tvlv_handler_flags {
+ /**
+ * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function
+ * will call this handler even if its type was not found (with no data)
+ */
BATADV_TVLV_HANDLER_OGM_CIFNOTFND = BIT(1),
+
+ /**
+ * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the
+ * API marks a handler as being called, so it won't be called if the
+ * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set
+ */
BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
};
/**
* struct batadv_store_mesh_work - Work queue item to detach add/del interface
* from sysfs locks
- * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
- * @soft_iface_name: name of soft-interface to modify
- * @work: work queue item
*/
struct batadv_store_mesh_work {
+ /**
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ */
struct net_device *net_dev;
+
+ /** @soft_iface_name: name of soft-interface to modify */
char soft_iface_name[IFNAMSIZ];
+
+ /** @work: work queue item */
struct work_struct work;
};
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 91e3ba280706..f897681780db 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -421,7 +421,7 @@ out:
}
EXPORT_SYMBOL(bt_sock_stream_recvmsg);
-static inline unsigned int bt_accept_poll(struct sock *parent)
+static inline __poll_t bt_accept_poll(struct sock *parent)
{
struct bt_sock *s, *n;
struct sock *sk;
@@ -437,11 +437,11 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
return 0;
}
-unsigned int bt_sock_poll(struct file *file, struct socket *sock,
+__poll_t bt_sock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -766,43 +766,39 @@ static int __init bt_init(void)
return err;
err = sock_register(&bt_sock_family_ops);
- if (err < 0) {
- bt_sysfs_cleanup();
- return err;
- }
+ if (err)
+ goto cleanup_sysfs;
BT_INFO("HCI device and connection manager initialized");
err = hci_sock_init();
- if (err < 0)
- goto error;
+ if (err)
+ goto unregister_socket;
err = l2cap_init();
- if (err < 0)
- goto sock_err;
+ if (err)
+ goto cleanup_socket;
err = sco_init();
- if (err < 0) {
- l2cap_exit();
- goto sock_err;
- }
+ if (err)
+ goto cleanup_cap;
err = mgmt_init();
- if (err < 0) {
- sco_exit();
- l2cap_exit();
- goto sock_err;
- }
+ if (err)
+ goto cleanup_sco;
return 0;
-sock_err:
+cleanup_sco:
+ sco_exit();
+cleanup_cap:
+ l2cap_exit();
+cleanup_socket:
hci_sock_cleanup();
-
-error:
+unregister_socket:
sock_unregister(PF_BLUETOOTH);
+cleanup_sysfs:
bt_sysfs_cleanup();
-
return err;
}
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index bb308224099c..426a92f02db4 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -527,7 +527,6 @@ static int cmtp_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations cmtp_proc_fops = {
- .owner = THIS_MODULE,
.open = cmtp_proc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 63df63ebfb24..57403bd567d0 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -88,6 +88,9 @@ static int __name ## _show(struct seq_file *f, void *ptr) \
return 0; \
} \
\
+DEFINE_SHOW_ATTRIBUTE(__name)
+
+#define DEFINE_SHOW_ATTRIBUTE(__name) \
static int __name ## _open(struct inode *inode, struct file *file) \
{ \
return single_open(file, __name ## _show, inode->i_private); \
@@ -106,37 +109,16 @@ static int features_show(struct seq_file *f, void *ptr)
u8 p;
hci_dev_lock(hdev);
- for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
- seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x "
- "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p,
- hdev->features[p][0], hdev->features[p][1],
- hdev->features[p][2], hdev->features[p][3],
- hdev->features[p][4], hdev->features[p][5],
- hdev->features[p][6], hdev->features[p][7]);
- }
+ for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++)
+ seq_printf(f, "%2u: %8ph\n", p, hdev->features[p]);
if (lmp_le_capable(hdev))
- seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x "
- "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n",
- hdev->le_features[0], hdev->le_features[1],
- hdev->le_features[2], hdev->le_features[3],
- hdev->le_features[4], hdev->le_features[5],
- hdev->le_features[6], hdev->le_features[7]);
+ seq_printf(f, "LE: %8ph\n", hdev->le_features);
hci_dev_unlock(hdev);
return 0;
}
-static int features_open(struct inode *inode, struct file *file)
-{
- return single_open(file, features_show, inode->i_private);
-}
-
-static const struct file_operations features_fops = {
- .open = features_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(features);
static int device_id_show(struct seq_file *f, void *ptr)
{
@@ -150,17 +132,7 @@ static int device_id_show(struct seq_file *f, void *ptr)
return 0;
}
-static int device_id_open(struct inode *inode, struct file *file)
-{
- return single_open(file, device_id_show, inode->i_private);
-}
-
-static const struct file_operations device_id_fops = {
- .open = device_id_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(device_id);
static int device_list_show(struct seq_file *f, void *ptr)
{
@@ -180,17 +152,7 @@ static int device_list_show(struct seq_file *f, void *ptr)
return 0;
}
-static int device_list_open(struct inode *inode, struct file *file)
-{
- return single_open(file, device_list_show, inode->i_private);
-}
-
-static const struct file_operations device_list_fops = {
- .open = device_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(device_list);
static int blacklist_show(struct seq_file *f, void *p)
{
@@ -205,17 +167,7 @@ static int blacklist_show(struct seq_file *f, void *p)
return 0;
}
-static int blacklist_open(struct inode *inode, struct file *file)
-{
- return single_open(file, blacklist_show, inode->i_private);
-}
-
-static const struct file_operations blacklist_fops = {
- .open = blacklist_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(blacklist);
static int uuids_show(struct seq_file *f, void *p)
{
@@ -240,17 +192,7 @@ static int uuids_show(struct seq_file *f, void *p)
return 0;
}
-static int uuids_open(struct inode *inode, struct file *file)
-{
- return single_open(file, uuids_show, inode->i_private);
-}
-
-static const struct file_operations uuids_fops = {
- .open = uuids_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(uuids);
static int remote_oob_show(struct seq_file *f, void *ptr)
{
@@ -269,17 +211,7 @@ static int remote_oob_show(struct seq_file *f, void *ptr)
return 0;
}
-static int remote_oob_open(struct inode *inode, struct file *file)
-{
- return single_open(file, remote_oob_show, inode->i_private);
-}
-
-static const struct file_operations remote_oob_fops = {
- .open = remote_oob_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(remote_oob);
static int conn_info_min_age_set(void *data, u64 val)
{
@@ -443,17 +375,7 @@ static int inquiry_cache_show(struct seq_file *f, void *p)
return 0;
}
-static int inquiry_cache_open(struct inode *inode, struct file *file)
-{
- return single_open(file, inquiry_cache_show, inode->i_private);
-}
-
-static const struct file_operations inquiry_cache_fops = {
- .open = inquiry_cache_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(inquiry_cache);
static int link_keys_show(struct seq_file *f, void *ptr)
{
@@ -469,17 +391,7 @@ static int link_keys_show(struct seq_file *f, void *ptr)
return 0;
}
-static int link_keys_open(struct inode *inode, struct file *file)
-{
- return single_open(file, link_keys_show, inode->i_private);
-}
-
-static const struct file_operations link_keys_fops = {
- .open = link_keys_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(link_keys);
static int dev_class_show(struct seq_file *f, void *ptr)
{
@@ -493,17 +405,7 @@ static int dev_class_show(struct seq_file *f, void *ptr)
return 0;
}
-static int dev_class_open(struct inode *inode, struct file *file)
-{
- return single_open(file, dev_class_show, inode->i_private);
-}
-
-static const struct file_operations dev_class_fops = {
- .open = dev_class_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(dev_class);
static int voice_setting_get(void *data, u64 *val)
{
@@ -692,17 +594,7 @@ static int identity_show(struct seq_file *f, void *p)
return 0;
}
-static int identity_open(struct inode *inode, struct file *file)
-{
- return single_open(file, identity_show, inode->i_private);
-}
-
-static const struct file_operations identity_fops = {
- .open = identity_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(identity);
static int rpa_timeout_set(void *data, u64 val)
{
@@ -746,17 +638,7 @@ static int random_address_show(struct seq_file *f, void *p)
return 0;
}
-static int random_address_open(struct inode *inode, struct file *file)
-{
- return single_open(file, random_address_show, inode->i_private);
-}
-
-static const struct file_operations random_address_fops = {
- .open = random_address_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(random_address);
static int static_address_show(struct seq_file *f, void *p)
{
@@ -769,17 +651,7 @@ static int static_address_show(struct seq_file *f, void *p)
return 0;
}
-static int static_address_open(struct inode *inode, struct file *file)
-{
- return single_open(file, static_address_show, inode->i_private);
-}
-
-static const struct file_operations static_address_fops = {
- .open = static_address_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(static_address);
static ssize_t force_static_address_read(struct file *file,
char __user *user_buf,
@@ -841,17 +713,7 @@ static int white_list_show(struct seq_file *f, void *ptr)
return 0;
}
-static int white_list_open(struct inode *inode, struct file *file)
-{
- return single_open(file, white_list_show, inode->i_private);
-}
-
-static const struct file_operations white_list_fops = {
- .open = white_list_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(white_list);
static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
{
@@ -869,18 +731,7 @@ static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
return 0;
}
-static int identity_resolving_keys_open(struct inode *inode, struct file *file)
-{
- return single_open(file, identity_resolving_keys_show,
- inode->i_private);
-}
-
-static const struct file_operations identity_resolving_keys_fops = {
- .open = identity_resolving_keys_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(identity_resolving_keys);
static int long_term_keys_show(struct seq_file *f, void *ptr)
{
@@ -898,17 +749,7 @@ static int long_term_keys_show(struct seq_file *f, void *ptr)
return 0;
}
-static int long_term_keys_open(struct inode *inode, struct file *file)
-{
- return single_open(file, long_term_keys_show, inode->i_private);
-}
-
-static const struct file_operations long_term_keys_fops = {
- .open = long_term_keys_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(long_term_keys);
static int conn_min_interval_set(void *data, u64 val)
{
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index abc0f3224dd1..3394e6791673 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -919,6 +919,43 @@ static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
return true;
}
+static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
+{
+ /* If there is no connection we are OK to advertise. */
+ if (hci_conn_num(hdev, LE_LINK) == 0)
+ return true;
+
+ /* Check le_states if there is any connection in slave role. */
+ if (hdev->conn_hash.le_num_slave > 0) {
+ /* Slave connection state and non connectable mode bit 20. */
+ if (!connectable && !(hdev->le_states[2] & 0x10))
+ return false;
+
+ /* Slave connection state and connectable mode bit 38
+ * and scannable bit 21.
+ */
+ if (connectable && (!(hdev->le_states[4] & 0x01) ||
+ !(hdev->le_states[2] & 0x40)))
+ return false;
+ }
+
+ /* Check le_states if there is any connection in master role. */
+ if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_slave) {
+ /* Master connection state and non connectable mode bit 18. */
+ if (!connectable && !(hdev->le_states[2] & 0x02))
+ return false;
+
+ /* Master connection state and connectable mode bit 35 and
+ * scannable 19.
+ */
+ if (connectable && (!(hdev->le_states[4] & 0x10) ||
+ !(hdev->le_states[2] & 0x08)))
+ return false;
+ }
+
+ return true;
+}
+
void __hci_req_enable_advertising(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
@@ -927,7 +964,15 @@ void __hci_req_enable_advertising(struct hci_request *req)
bool connectable;
u32 flags;
- if (hci_conn_num(hdev, LE_LINK) > 0)
+ flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
+
+ /* If the "connectable" instance flag was not set, then choose between
+ * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
+ */
+ connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
+ mgmt_get_connectable(hdev);
+
+ if (!is_advertising_allowed(hdev, connectable))
return;
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
@@ -940,14 +985,6 @@ void __hci_req_enable_advertising(struct hci_request *req)
*/
hci_dev_clear_flag(hdev, HCI_LE_ADV);
- flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
-
- /* If the "connectable" instance flag was not set, then choose between
- * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
- */
- connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
- mgmt_get_connectable(hdev);
-
/* Set require_privacy to true only when non-connectable
* advertising is used. In that case it is fine to use a
* non-resolvable private address.
@@ -1985,13 +2022,6 @@ unlock:
hci_dev_unlock(hdev);
}
-static void disable_advertising(struct hci_request *req)
-{
- u8 enable = 0x00;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
static int active_scan(struct hci_request *req, unsigned long opt)
{
uint16_t interval = opt;
@@ -2017,7 +2047,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
cancel_adv_timeout(hdev);
hci_dev_unlock(hdev);
- disable_advertising(req);
+ __hci_req_disable_advertising(req);
}
/* If controller is scanning, it means the background scanning is
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f2cec70d520c..1036e4fa1ea2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -789,7 +789,7 @@ static int hidp_setup_hid(struct hidp_session *session,
hid->dev.parent = &session->conn->hcon->dev;
hid->ll_driver = &hidp_hid_driver;
- /* True if device is blacklisted in drivers/hid/hid-core.c */
+ /* True if device is blacklisted in drivers/hid/hid-quirks.c */
if (hid_ignore(hid)) {
hid_destroy_device(session->hid);
session->hid = NULL;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 43ba91c440bc..fc6615d59165 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
break;
case L2CAP_CONF_EFS:
- remote_efs = 1;
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
+ remote_efs = 1;
memcpy(&efs, (void *) val, olen);
+ }
break;
case L2CAP_CONF_EWS:
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
break;
case L2CAP_CONF_EFS:
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
memcpy(&efs, (void *)val, olen);
- if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != chan->local_stype)
- return -ECONNREFUSED;
+ if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != chan->local_stype)
+ return -ECONNREFUSED;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+ (unsigned long) &efs, endptr - ptr);
+ }
break;
case L2CAP_CONF_FCS:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index af5b8c87f590..1285ca30ab0a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -125,9 +125,16 @@ static int br_dev_init(struct net_device *dev)
if (!br->stats)
return -ENOMEM;
+ err = br_fdb_hash_init(br);
+ if (err) {
+ free_percpu(br->stats);
+ return err;
+ }
+
err = br_vlan_init(br);
if (err) {
free_percpu(br->stats);
+ br_fdb_hash_fini(br);
return err;
}
@@ -135,6 +142,7 @@ static int br_dev_init(struct net_device *dev)
if (err) {
free_percpu(br->stats);
br_vlan_flush(br);
+ br_fdb_hash_fini(br);
}
br_set_lockdep_class(dev);
@@ -148,6 +156,7 @@ static void br_dev_uninit(struct net_device *dev)
br_multicast_dev_del(br);
br_multicast_uninit_stats(br);
br_vlan_flush(br);
+ br_fdb_hash_fini(br);
free_percpu(br->stats);
}
@@ -416,6 +425,7 @@ void br_dev_setup(struct net_device *dev)
br->dev = dev;
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
+ INIT_HLIST_HEAD(&br->fdb_list);
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4ea5c8bbe286..d9e69e4514be 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -28,14 +28,20 @@
#include <trace/events/bridge.h>
#include "br_private.h"
+static const struct rhashtable_params br_fdb_rht_params = {
+ .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode),
+ .key_offset = offsetof(struct net_bridge_fdb_entry, key),
+ .key_len = sizeof(struct net_bridge_fdb_key),
+ .automatic_shrinking = true,
+ .locks_mul = 1,
+};
+
static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
static void fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *, int);
-static u32 fdb_salt __read_mostly;
-
int __init br_fdb_init(void)
{
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
@@ -45,7 +51,6 @@ int __init br_fdb_init(void)
if (!br_fdb_cache)
return -ENOMEM;
- get_random_bytes(&fdb_salt, sizeof(fdb_salt));
return 0;
}
@@ -54,6 +59,15 @@ void br_fdb_fini(void)
kmem_cache_destroy(br_fdb_cache);
}
+int br_fdb_hash_init(struct net_bridge *br)
+{
+ return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params);
+}
+
+void br_fdb_hash_fini(struct net_bridge *br)
+{
+ rhashtable_destroy(&br->fdb_hash_tbl);
+}
/* if topology_changing then use forward_delay (default 15 sec)
* otherwise keep longer (default 5 minutes)
@@ -70,13 +84,6 @@ static inline int has_expired(const struct net_bridge *br,
time_before_eq(fdb->updated + hold_time(br), jiffies);
}
-static inline int br_mac_hash(const unsigned char *mac, __u16 vid)
-{
- /* use 1 byte of OUI and 3 bytes of NIC */
- u32 key = get_unaligned((u32 *)(mac + 2));
- return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1);
-}
-
static void fdb_rcu_free(struct rcu_head *head)
{
struct net_bridge_fdb_entry *ent
@@ -84,19 +91,18 @@ static void fdb_rcu_free(struct rcu_head *head)
kmem_cache_free(br_fdb_cache, ent);
}
-static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
+static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl,
const unsigned char *addr,
__u16 vid)
{
- struct net_bridge_fdb_entry *f;
+ struct net_bridge_fdb_key key;
WARN_ON_ONCE(!rcu_read_lock_held());
- hlist_for_each_entry_rcu(f, head, hlist)
- if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid)
- break;
+ key.vlan_id = vid;
+ memcpy(key.addr.addr, addr, sizeof(key.addr.addr));
- return f;
+ return rhashtable_lookup(tbl, &key, br_fdb_rht_params);
}
/* requires bridge hash_lock */
@@ -104,13 +110,12 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
const unsigned char *addr,
__u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
lockdep_assert_held_once(&br->hash_lock);
rcu_read_lock();
- fdb = fdb_find_rcu(head, addr, vid);
+ fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
rcu_read_unlock();
return fdb;
@@ -120,9 +125,7 @@ struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
const unsigned char *addr,
__u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
-
- return fdb_find_rcu(head, addr, vid);
+ return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
}
/* When a static FDB entry is added, the mac address from the entry is
@@ -175,9 +178,11 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
trace_fdb_delete(br, f);
if (f->is_static)
- fdb_del_hw_addr(br, f->addr.addr);
+ fdb_del_hw_addr(br, f->key.addr.addr);
- hlist_del_init_rcu(&f->hlist);
+ hlist_del_init_rcu(&f->fdb_node);
+ rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
+ br_fdb_rht_params);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
}
@@ -187,11 +192,11 @@ static void fdb_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
struct net_bridge_fdb_entry *f)
{
- const unsigned char *addr = f->addr.addr;
+ const unsigned char *addr = f->key.addr.addr;
struct net_bridge_vlan_group *vg;
const struct net_bridge_vlan *v;
struct net_bridge_port *op;
- u16 vid = f->vlan_id;
+ u16 vid = f->key.vlan_id;
/* Maybe another port has same hw addr? */
list_for_each_entry(op, &br->port_list, list) {
@@ -233,31 +238,23 @@ void br_fdb_find_delete_local(struct net_bridge *br,
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
struct net_bridge_vlan_group *vg;
+ struct net_bridge_fdb_entry *f;
struct net_bridge *br = p->br;
struct net_bridge_vlan *v;
- int i;
spin_lock_bh(&br->hash_lock);
-
vg = nbp_vlan_group(p);
- /* Search all chains since old address/hash is unknown */
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct hlist_node *h;
- hlist_for_each(h, &br->hash[i]) {
- struct net_bridge_fdb_entry *f;
-
- f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
- if (f->dst == p && f->is_local && !f->added_by_user) {
- /* delete old one */
- fdb_delete_local(br, p, f);
-
- /* if this port has no vlan information
- * configured, we can safely be done at
- * this point.
- */
- if (!vg || !vg->num_vlans)
- goto insert;
- }
+ hlist_for_each_entry(f, &br->fdb_list, fdb_node) {
+ if (f->dst == p && f->is_local && !f->added_by_user) {
+ /* delete old one */
+ fdb_delete_local(br, p, f);
+
+ /* if this port has no vlan information
+ * configured, we can safely be done at
+ * this point.
+ */
+ if (!vg || !vg->num_vlans)
+ goto insert;
}
}
@@ -316,35 +313,32 @@ void br_fdb_cleanup(struct work_struct *work)
{
struct net_bridge *br = container_of(work, struct net_bridge,
gc_work.work);
+ struct net_bridge_fdb_entry *f = NULL;
unsigned long delay = hold_time(br);
unsigned long work_delay = delay;
unsigned long now = jiffies;
- int i;
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct net_bridge_fdb_entry *f;
- struct hlist_node *n;
+ /* this part is tricky, in order to avoid blocking learning and
+ * consequently forwarding, we rely on rcu to delete objects with
+ * delayed freeing allowing us to continue traversing
+ */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ unsigned long this_timer;
- if (!br->hash[i].first)
+ if (f->is_static || f->added_by_external_learn)
continue;
-
- spin_lock_bh(&br->hash_lock);
- hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
- unsigned long this_timer;
-
- if (f->is_static)
- continue;
- if (f->added_by_external_learn)
- continue;
- this_timer = f->updated + delay;
- if (time_after(this_timer, now))
- work_delay = min(work_delay, this_timer - now);
- else
+ this_timer = f->updated + delay;
+ if (time_after(this_timer, now)) {
+ work_delay = min(work_delay, this_timer - now);
+ } else {
+ spin_lock_bh(&br->hash_lock);
+ if (!hlist_unhashed(&f->fdb_node))
fdb_delete(br, f);
+ spin_unlock_bh(&br->hash_lock);
}
- spin_unlock_bh(&br->hash_lock);
- cond_resched();
}
+ rcu_read_unlock();
/* Cleanup minimum 10 milliseconds apart */
work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
@@ -354,16 +348,13 @@ void br_fdb_cleanup(struct work_struct *work)
/* Completely flush all dynamic entries in forwarding database.*/
void br_fdb_flush(struct net_bridge *br)
{
- int i;
+ struct net_bridge_fdb_entry *f;
+ struct hlist_node *tmp;
spin_lock_bh(&br->hash_lock);
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct net_bridge_fdb_entry *f;
- struct hlist_node *n;
- hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
- if (!f->is_static)
- fdb_delete(br, f);
- }
+ hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
+ if (!f->is_static)
+ fdb_delete(br, f);
}
spin_unlock_bh(&br->hash_lock);
}
@@ -377,27 +368,22 @@ void br_fdb_delete_by_port(struct net_bridge *br,
u16 vid,
int do_all)
{
- int i;
+ struct net_bridge_fdb_entry *f;
+ struct hlist_node *tmp;
spin_lock_bh(&br->hash_lock);
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct hlist_node *h, *g;
+ hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
+ if (f->dst != p)
+ continue;
- hlist_for_each_safe(h, g, &br->hash[i]) {
- struct net_bridge_fdb_entry *f
- = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
- if (f->dst != p)
+ if (!do_all)
+ if (f->is_static || (vid && f->key.vlan_id != vid))
continue;
- if (!do_all)
- if (f->is_static || (vid && f->vlan_id != vid))
- continue;
-
- if (f->is_local)
- fdb_delete_local(br, p, f);
- else
- fdb_delete(br, f);
- }
+ if (f->is_local)
+ fdb_delete_local(br, p, f);
+ else
+ fdb_delete(br, f);
}
spin_unlock_bh(&br->hash_lock);
}
@@ -433,52 +419,48 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
int br_fdb_fillbuf(struct net_bridge *br, void *buf,
unsigned long maxnum, unsigned long skip)
{
- struct __fdb_entry *fe = buf;
- int i, num = 0;
struct net_bridge_fdb_entry *f;
+ struct __fdb_entry *fe = buf;
+ int num = 0;
memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
rcu_read_lock();
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
- if (num >= maxnum)
- goto out;
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (num >= maxnum)
+ break;
- if (has_expired(br, f))
- continue;
+ if (has_expired(br, f))
+ continue;
- /* ignore pseudo entry for local MAC address */
- if (!f->dst)
- continue;
+ /* ignore pseudo entry for local MAC address */
+ if (!f->dst)
+ continue;
- if (skip) {
- --skip;
- continue;
- }
+ if (skip) {
+ --skip;
+ continue;
+ }
- /* convert from internal format to API */
- memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN);
+ /* convert from internal format to API */
+ memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN);
- /* due to ABI compat need to split into hi/lo */
- fe->port_no = f->dst->port_no;
- fe->port_hi = f->dst->port_no >> 8;
+ /* due to ABI compat need to split into hi/lo */
+ fe->port_no = f->dst->port_no;
+ fe->port_hi = f->dst->port_no >> 8;
- fe->is_local = f->is_local;
- if (!f->is_static)
- fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
- ++fe;
- ++num;
- }
+ fe->is_local = f->is_local;
+ if (!f->is_static)
+ fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
+ ++fe;
+ ++num;
}
-
- out:
rcu_read_unlock();
return num;
}
-static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
+static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
struct net_bridge_port *source,
const unsigned char *addr,
__u16 vid,
@@ -489,16 +471,23 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
if (fdb) {
- memcpy(fdb->addr.addr, addr, ETH_ALEN);
+ memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
fdb->dst = source;
- fdb->vlan_id = vid;
+ fdb->key.vlan_id = vid;
fdb->is_local = is_local;
fdb->is_static = is_static;
fdb->added_by_user = 0;
fdb->added_by_external_learn = 0;
fdb->offloaded = 0;
fdb->updated = fdb->used = jiffies;
- hlist_add_head_rcu(&fdb->hlist, head);
+ if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
+ &fdb->rhnode,
+ br_fdb_rht_params)) {
+ kmem_cache_free(br_fdb_cache, fdb);
+ fdb = NULL;
+ } else {
+ hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
+ }
}
return fdb;
}
@@ -506,7 +495,6 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
if (!is_valid_ether_addr(addr))
@@ -524,7 +512,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
fdb_delete(br, fdb);
}
- fdb = fdb_create(head, source, addr, vid, 1, 1);
+ fdb = fdb_create(br, source, addr, vid, 1, 1);
if (!fdb)
return -ENOMEM;
@@ -548,7 +536,6 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, bool added_by_user)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
bool fdb_modified = false;
@@ -561,7 +548,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
source->state == BR_STATE_FORWARDING))
return;
- fdb = fdb_find_rcu(head, addr, vid);
+ fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
if (likely(fdb)) {
/* attempt to update an entry for a local interface */
if (unlikely(fdb->is_local)) {
@@ -590,14 +577,13 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
}
} else {
spin_lock(&br->hash_lock);
- if (likely(!fdb_find_rcu(head, addr, vid))) {
- fdb = fdb_create(head, source, addr, vid, 0, 0);
- if (fdb) {
- if (unlikely(added_by_user))
- fdb->added_by_user = 1;
- trace_br_fdb_update(br, source, addr, vid, added_by_user);
- fdb_notify(br, fdb, RTM_NEWNEIGH);
- }
+ fdb = fdb_create(br, source, addr, vid, 0, 0);
+ if (fdb) {
+ if (unlikely(added_by_user))
+ fdb->added_by_user = 1;
+ trace_br_fdb_update(br, source, addr, vid,
+ added_by_user);
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
}
/* else we lose race and someone else inserts
* it first, don't bother updating
@@ -646,7 +632,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (fdb->added_by_external_learn)
ndm->ndm_flags |= NTF_EXT_LEARNED;
- if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
+ if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
goto nla_put_failure;
if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
goto nla_put_failure;
@@ -657,7 +643,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
- if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
+ if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
+ &fdb->key.vlan_id))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -711,54 +698,48 @@ int br_fdb_dump(struct sk_buff *skb,
int *idx)
{
struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_fdb_entry *f;
int err = 0;
- int i;
if (!(dev->priv_flags & IFF_EBRIDGE))
- goto out;
+ return err;
if (!filter_dev) {
err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
if (err < 0)
- goto out;
+ return err;
}
- for (i = 0; i < BR_HASH_SIZE; i++) {
- struct net_bridge_fdb_entry *f;
-
- hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
-
- if (*idx < cb->args[2])
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (*idx < cb->args[2])
+ goto skip;
+ if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) {
+ if (filter_dev != dev)
goto skip;
-
- if (filter_dev &&
- (!f->dst || f->dst->dev != filter_dev)) {
- if (filter_dev != dev)
- goto skip;
- /* !f->dst is a special case for bridge
- * It means the MAC belongs to the bridge
- * Therefore need a little more filtering
- * we only want to dump the !f->dst case
- */
- if (f->dst)
- goto skip;
- }
- if (!filter_dev && f->dst)
+ /* !f->dst is a special case for bridge
+ * It means the MAC belongs to the bridge
+ * Therefore need a little more filtering
+ * we only want to dump the !f->dst case
+ */
+ if (f->dst)
goto skip;
-
- err = fdb_fill_info(skb, br, f,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI);
- if (err < 0)
- goto out;
-skip:
- *idx += 1;
}
+ if (!filter_dev && f->dst)
+ goto skip;
+
+ err = fdb_fill_info(skb, br, f,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH,
+ NLM_F_MULTI);
+ if (err < 0)
+ break;
+skip:
+ *idx += 1;
}
+ rcu_read_unlock();
-out:
return err;
}
@@ -766,7 +747,6 @@ out:
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
{
- struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
bool modified = false;
@@ -787,7 +767,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- fdb = fdb_create(head, source, addr, vid, 0, 0);
+ fdb = fdb_create(br, source, addr, vid, 0, 0);
if (!fdb)
return -ENOMEM;
@@ -1012,65 +992,60 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
{
- struct net_bridge_fdb_entry *fdb, *tmp;
- int i;
- int err;
+ struct net_bridge_fdb_entry *f, *tmp;
+ int err = 0;
ASSERT_RTNL();
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry(fdb, &br->hash[i], hlist) {
- /* We only care for static entries */
- if (!fdb->is_static)
- continue;
-
- err = dev_uc_add(p->dev, fdb->addr.addr);
- if (err)
- goto rollback;
- }
+ /* the key here is that static entries change only under rtnl */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ /* We only care for static entries */
+ if (!f->is_static)
+ continue;
+ err = dev_uc_add(p->dev, f->key.addr.addr);
+ if (err)
+ goto rollback;
}
- return 0;
+done:
+ rcu_read_unlock();
-rollback:
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry(tmp, &br->hash[i], hlist) {
- /* If we reached the fdb that failed, we can stop */
- if (tmp == fdb)
- break;
-
- /* We only care for static entries */
- if (!tmp->is_static)
- continue;
+ return err;
- dev_uc_del(p->dev, tmp->addr.addr);
- }
+rollback:
+ hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) {
+ /* We only care for static entries */
+ if (!tmp->is_static)
+ continue;
+ if (tmp == f)
+ break;
+ dev_uc_del(p->dev, tmp->key.addr.addr);
}
- return err;
+
+ goto done;
}
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
{
- struct net_bridge_fdb_entry *fdb;
- int i;
+ struct net_bridge_fdb_entry *f;
ASSERT_RTNL();
- for (i = 0; i < BR_HASH_SIZE; i++) {
- hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) {
- /* We only care for static entries */
- if (!fdb->is_static)
- continue;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ /* We only care for static entries */
+ if (!f->is_static)
+ continue;
- dev_uc_del(p->dev, fdb->addr.addr);
- }
+ dev_uc_del(p->dev, f->key.addr.addr);
}
+ rcu_read_unlock();
}
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid)
{
struct net_bridge_fdb_entry *fdb;
- struct hlist_head *head;
bool modified = false;
int err = 0;
@@ -1078,10 +1053,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
spin_lock_bh(&br->hash_lock);
- head = &br->hash[br_mac_hash(addr, vid)];
fdb = br_fdb_find(br, addr, vid);
if (!fdb) {
- fdb = fdb_create(head, p, addr, vid, 0, 0);
+ fdb = fdb_create(br, p, addr, vid, 0, 0);
if (!fdb) {
err = -ENOMEM;
goto err_unlock;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b0f4c734900b..6d9f48bd374a 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -760,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
void br_mdb_init(void)
{
- rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
- rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
}
void br_mdb_uninit(void)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c2eea1b8737a..27f1d4f2114a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -991,7 +991,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
unsigned int i;
int ret;
- e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+ e = rcu_dereference(net->nf.hooks_bridge[hook]);
if (!e)
return okfn(net, sk, skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
struct net_bridge *br = netdev_priv(dev);
int err;
+ err = register_netdevice(dev);
+ if (err)
+ return err;
+
if (tb[IFLA_ADDRESS]) {
spin_lock_bh(&br->lock);
br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
spin_unlock_bh(&br->lock);
}
- err = register_netdevice(dev);
- if (err)
- return err;
-
err = br_changelink(dev, tb, data, extack);
if (err)
- unregister_netdevice(dev);
+ br_dev_delete(dev, NULL);
+
return err;
}
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 20cbb727df4d..8e2d7cfa4e16 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -78,7 +78,6 @@ void br_netfilter_rtable_init(struct net_bridge *br)
atomic_set(&rt->dst.__refcnt, 1);
rt->dst.dev = br->dev;
- rt->dst.path = &rt->dst;
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1312b8d20ec3..8e13a64d8c99 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -168,12 +168,17 @@ struct net_bridge_vlan_group {
u16 pvid;
};
+struct net_bridge_fdb_key {
+ mac_addr addr;
+ u16 vlan_id;
+};
+
struct net_bridge_fdb_entry {
- struct hlist_node hlist;
+ struct rhash_head rhnode;
struct net_bridge_port *dst;
- mac_addr addr;
- __u16 vlan_id;
+ struct net_bridge_fdb_key key;
+ struct hlist_node fdb_node;
unsigned char is_local:1,
is_static:1,
added_by_user:1,
@@ -315,7 +320,7 @@ struct net_bridge {
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
- struct hlist_head hash[BR_HASH_SIZE];
+ struct rhashtable fdb_hash_tbl;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
union {
struct rtable fake_rtable;
@@ -405,6 +410,7 @@ struct net_bridge {
int offload_fwd_mark;
#endif
bool neigh_suppress_enabled;
+ struct hlist_head fdb_list;
};
struct br_input_skb_cb {
@@ -515,6 +521,8 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
/* br_fdb.c */
int br_fdb_init(void);
void br_fdb_fini(void);
+int br_fdb_hash_init(struct net_bridge *br);
+void br_fdb_hash_fini(struct net_bridge *br);
void br_fdb_flush(struct net_bridge *br);
void br_fdb_find_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
@@ -752,7 +760,7 @@ static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
static inline bool br_multicast_is_router(struct net_bridge *br)
{
- return 0;
+ return false;
}
static inline bool br_multicast_querier_exists(struct net_bridge *br,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 9700e0f3307b..ee775f4ff76c 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -121,13 +121,13 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
switch (type) {
case RTM_DELNEIGH:
- br_switchdev_fdb_call_notifiers(false, fdb->addr.addr,
- fdb->vlan_id,
+ br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
+ fdb->key.vlan_id,
fdb->dst->dev);
break;
case RTM_NEWNEIGH:
- br_switchdev_fdb_call_notifiers(true, fdb->addr.addr,
- fdb->vlan_id,
+ br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
+ fdb->key.vlan_id,
fdb->dst->dev);
break;
}
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 723f25eed8ea..b1be0dcfba6b 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -272,10 +272,7 @@ static ssize_t group_addr_show(struct device *d,
struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%x:%x:%x:%x:%x:%x\n",
- br->group_addr[0], br->group_addr[1],
- br->group_addr[2], br->group_addr[3],
- br->group_addr[4], br->group_addr[5]);
+ return sprintf(buf, "%pM\n", br->group_addr);
}
static ssize_t group_addr_store(struct device *d,
@@ -284,14 +281,11 @@ static ssize_t group_addr_store(struct device *d,
{
struct net_bridge *br = to_bridge(d);
u8 new_addr[6];
- int i;
if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
- &new_addr[0], &new_addr[1], &new_addr[2],
- &new_addr[3], &new_addr[4], &new_addr[5]) != 6)
+ if (!mac_pton(buf, new_addr))
return -EINVAL;
if (!is_link_local_ether_addr(new_addr))
@@ -306,8 +300,7 @@ static ssize_t group_addr_store(struct device *d,
return restart_syscall();
spin_lock_bh(&br->lock);
- for (i = 0; i < 6; i++)
- br->group_addr[i] = new_addr[i];
+ ether_addr_copy(br->group_addr, new_addr);
spin_unlock_bh(&br->lock);
br->group_addr_set = true;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index e7ef1a1ef3a6..225d1668dfdd 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,6 +4,7 @@
#
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
+ select NETFILTER_FAMILY_BRIDGE
tristate "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
@@ -29,6 +30,7 @@ endif # NF_TABLES_BRIDGE
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
+ select NETFILTER_FAMILY_BRIDGE
help
ebtables is a general, extensible frame/packet identification
framework. Say 'Y' or 'M' here if you want to do Ethernet
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 37817d25b63d..02c4b409d317 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2445,7 +2445,6 @@ static int __init ebtables_init(void)
return ret;
}
- printk(KERN_INFO "Ebtables v2.0 registered\n");
return 0;
}
@@ -2453,7 +2452,6 @@ static void __exit ebtables_fini(void)
{
nf_unregister_sockopt(&ebt_sockopts);
xt_unregister_target(&ebt_standard_target);
- printk(KERN_INFO "Ebtables v2.0 unregistered\n");
}
EXPORT_SYMBOL(ebt_register_table);
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 97afdc0744e6..5160cf614176 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -25,63 +25,23 @@ nft_do_chain_bridge(void *priv,
{
struct nft_pktinfo pkt;
+ nft_set_pktinfo(&pkt, skb, state);
+
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
break;
}
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_bridge __read_mostly = {
- .family = NFPROTO_BRIDGE,
- .nhooks = NF_BR_NUMHOOKS,
- .owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
- [NF_BR_FORWARD] = nft_do_chain_bridge,
- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
- },
-};
-
-static int nf_tables_bridge_init_net(struct net *net)
-{
- net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.bridge == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
-
- if (nft_register_afinfo(net, net->nft.bridge) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.bridge);
- return -ENOMEM;
-}
-
-static void nf_tables_bridge_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.bridge);
- kfree(net->nft.bridge);
-}
-
-static struct pernet_operations nf_tables_bridge_net_ops = {
- .init = nf_tables_bridge_init_net,
- .exit = nf_tables_bridge_exit_net,
-};
-
static const struct nf_chain_type filter_bridge = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -92,75 +52,23 @@ static const struct nf_chain_type filter_bridge = {
(1 << NF_BR_FORWARD) |
(1 << NF_BR_LOCAL_OUT) |
(1 << NF_BR_POST_ROUTING),
-};
-
-static void nf_br_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
-}
-
-static int nf_br_reroute(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry)
-{
- return 0;
-}
-
-static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, u_int8_t protocol)
-{
- return 0;
-}
-
-static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol)
-{
- return 0;
-}
-
-static int nf_br_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict __always_unused)
-{
- return 0;
-}
-
-static const struct nf_afinfo nf_br_afinfo = {
- .family = AF_BRIDGE,
- .checksum = nf_br_checksum,
- .checksum_partial = nf_br_checksum_partial,
- .route = nf_br_route,
- .saveroute = nf_br_saveroute,
- .reroute = nf_br_reroute,
- .route_key_size = 0,
+ .hooks = {
+ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
+ [NF_BR_FORWARD] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
+ [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
+ },
};
static int __init nf_tables_bridge_init(void)
{
- int ret;
-
- nf_register_afinfo(&nf_br_afinfo);
- ret = nft_register_chain_type(&filter_bridge);
- if (ret < 0)
- goto err1;
-
- ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
- if (ret < 0)
- goto err2;
-
- return ret;
-
-err2:
- nft_unregister_chain_type(&filter_bridge);
-err1:
- nf_unregister_afinfo(&nf_br_afinfo);
- return ret;
+ return nft_register_chain_type(&filter_bridge);
}
static void __exit nf_tables_bridge_exit(void)
{
- unregister_pernet_subsys(&nf_tables_bridge_net_ops);
nft_unregister_chain_type(&filter_bridge);
- nf_unregister_afinfo(&nf_br_afinfo);
}
module_init(nf_tables_bridge_init);
@@ -168,4 +76,4 @@ module_exit(nf_tables_bridge_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
+MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 2d38b6e34203..e0adcd123f48 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_lock(&caifdevs->lock);
list_add_rcu(&caifd->list, &caifdevs->list);
- strncpy(caifd->layer.name, dev->name,
- sizeof(caifd->layer.name) - 1);
- caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+ strlcpy(caifd->layer.name, dev->name,
+ sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
cfcnfg_add_phy_layer(cfg,
dev,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 632d5a416d97..64048cec41e0 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,11 +934,11 @@ static int caif_release(struct socket *sock)
}
/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static unsigned int caif_poll(struct file *file,
+static __poll_t caif_poll(struct file *file,
struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask;
+ __poll_t mask;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
sock_poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 5cd44f001f64..1a082a946045 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
dev_add_pack(&caif_usb_type);
pack_added = true;
- strncpy(layer->name, dev->name,
- sizeof(layer->name) - 1);
- layer->name[sizeof(layer->name) - 1] = 0;
+ strlcpy(layer->name, dev->name, sizeof(layer->name));
return 0;
}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 273cb07f57d8..8f00bea093b9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
case CAIFPROTO_RFM:
l->linktype = CFCTRL_SRV_RFM;
l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
- strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
- sizeof(l->u.rfm.volume)-1);
- l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+ strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+ sizeof(l->u.rfm.volume));
break;
case CAIFPROTO_UTIL:
l->linktype = CFCTRL_SRV_UTIL;
l->endpoint = 0x00;
l->chtype = 0x00;
- strncpy(l->u.utility.name, s->sockaddr.u.util.service,
- sizeof(l->u.utility.name)-1);
- l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+ strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+ sizeof(l->u.utility.name));
caif_assert(sizeof(l->u.utility.name) > 10);
l->u.utility.paramlen = s->param.size;
if (l->u.utility.paramlen > sizeof(l->u.utility.params))
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index f5afda1abc76..a1e85f032108 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
memset(utility_name, 0, sizeof(utility_name));
- strncpy(utility_name, param->u.utility.name,
- UTILITY_NAME_LENGTH - 1);
+ strlcpy(utility_name, param->u.utility.name,
+ UTILITY_NAME_LENGTH);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
cfpkt_add_body(pkt, &tmp8, 1);
@@ -352,15 +352,14 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
u8 cmdrsp;
u8 cmd;
int ret = -1;
- u16 tmp16;
u8 len;
u8 param[255];
- u8 linkid;
+ u8 linkid = 0;
struct cfctrl *cfctrl = container_obj(layer);
struct cfctrl_request_info rsp, *req;
- cfpkt_extr_head(pkt, &cmdrsp, 1);
+ cmdrsp = cfpkt_extr_head_u8(pkt);
cmd = cmdrsp & CFCTRL_CMD_MASK;
if (cmd != CFCTRL_CMD_LINK_ERR
&& CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)
@@ -378,13 +377,12 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
u8 physlinkid;
u8 prio;
u8 tmp;
- u32 tmp32;
u8 *cp;
int i;
struct cfctrl_link_param linkparam;
memset(&linkparam, 0, sizeof(linkparam));
- cfpkt_extr_head(pkt, &tmp, 1);
+ tmp = cfpkt_extr_head_u8(pkt);
serv = tmp & CFCTRL_SRV_MASK;
linkparam.linktype = serv;
@@ -392,13 +390,13 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
servtype = tmp >> 4;
linkparam.chtype = servtype;
- cfpkt_extr_head(pkt, &tmp, 1);
+ tmp = cfpkt_extr_head_u8(pkt);
physlinkid = tmp & 0x07;
prio = tmp >> 3;
linkparam.priority = prio;
linkparam.phyid = physlinkid;
- cfpkt_extr_head(pkt, &endpoint, 1);
+ endpoint = cfpkt_extr_head_u8(pkt);
linkparam.endpoint = endpoint & 0x03;
switch (serv) {
@@ -407,45 +405,43 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
if (CFCTRL_ERR_BIT & cmdrsp)
break;
/* Link ID */
- cfpkt_extr_head(pkt, &linkid, 1);
+ linkid = cfpkt_extr_head_u8(pkt);
break;
case CFCTRL_SRV_VIDEO:
- cfpkt_extr_head(pkt, &tmp, 1);
+ tmp = cfpkt_extr_head_u8(pkt);
linkparam.u.video.connid = tmp;
if (CFCTRL_ERR_BIT & cmdrsp)
break;
/* Link ID */
- cfpkt_extr_head(pkt, &linkid, 1);
+ linkid = cfpkt_extr_head_u8(pkt);
break;
case CFCTRL_SRV_DATAGRAM:
- cfpkt_extr_head(pkt, &tmp32, 4);
linkparam.u.datagram.connid =
- le32_to_cpu(tmp32);
+ cfpkt_extr_head_u32(pkt);
if (CFCTRL_ERR_BIT & cmdrsp)
break;
/* Link ID */
- cfpkt_extr_head(pkt, &linkid, 1);
+ linkid = cfpkt_extr_head_u8(pkt);
break;
case CFCTRL_SRV_RFM:
/* Construct a frame, convert
* DatagramConnectionID
* to network format long and copy it out...
*/
- cfpkt_extr_head(pkt, &tmp32, 4);
linkparam.u.rfm.connid =
- le32_to_cpu(tmp32);
+ cfpkt_extr_head_u32(pkt);
cp = (u8 *) linkparam.u.rfm.volume;
- for (cfpkt_extr_head(pkt, &tmp, 1);
+ for (tmp = cfpkt_extr_head_u8(pkt);
cfpkt_more(pkt) && tmp != '\0';
- cfpkt_extr_head(pkt, &tmp, 1))
+ tmp = cfpkt_extr_head_u8(pkt))
*cp++ = tmp;
*cp = '\0';
if (CFCTRL_ERR_BIT & cmdrsp)
break;
/* Link ID */
- cfpkt_extr_head(pkt, &linkid, 1);
+ linkid = cfpkt_extr_head_u8(pkt);
break;
case CFCTRL_SRV_UTIL:
@@ -454,13 +450,11 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
* to network format long and copy it out...
*/
/* Fifosize KB */
- cfpkt_extr_head(pkt, &tmp16, 2);
linkparam.u.utility.fifosize_kb =
- le16_to_cpu(tmp16);
+ cfpkt_extr_head_u16(pkt);
/* Fifosize bufs */
- cfpkt_extr_head(pkt, &tmp16, 2);
linkparam.u.utility.fifosize_bufs =
- le16_to_cpu(tmp16);
+ cfpkt_extr_head_u16(pkt);
/* name */
cp = (u8 *) linkparam.u.utility.name;
caif_assert(sizeof(linkparam.u.utility.name)
@@ -468,24 +462,24 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
for (i = 0;
i < UTILITY_NAME_LENGTH
&& cfpkt_more(pkt); i++) {
- cfpkt_extr_head(pkt, &tmp, 1);
+ tmp = cfpkt_extr_head_u8(pkt);
*cp++ = tmp;
}
/* Length */
- cfpkt_extr_head(pkt, &len, 1);
+ len = cfpkt_extr_head_u8(pkt);
linkparam.u.utility.paramlen = len;
/* Param Data */
cp = linkparam.u.utility.params;
while (cfpkt_more(pkt) && len--) {
- cfpkt_extr_head(pkt, &tmp, 1);
+ tmp = cfpkt_extr_head_u8(pkt);
*cp++ = tmp;
}
if (CFCTRL_ERR_BIT & cmdrsp)
break;
/* Link ID */
- cfpkt_extr_head(pkt, &linkid, 1);
+ linkid = cfpkt_extr_head_u8(pkt);
/* Length */
- cfpkt_extr_head(pkt, &len, 1);
+ len = cfpkt_extr_head_u8(pkt);
/* Param Data */
cfpkt_extr_head(pkt, &param, len);
break;
@@ -522,7 +516,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
}
break;
case CFCTRL_CMD_LINK_DESTROY:
- cfpkt_extr_head(pkt, &linkid, 1);
+ linkid = cfpkt_extr_head_u8(pkt);
cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
break;
case CFCTRL_CMD_LINK_ERR:
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 71b6ab240dea..38c2b7a890dd 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -8,7 +8,6 @@
#include <linux/string.h>
#include <linux/skbuff.h>
-#include <linux/hardirq.h>
#include <linux/export.h>
#include <net/caif/cfpkt.h>
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 922ac1d605b3..53ecda10b790 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -8,7 +8,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
#include <linux/fs.h>
-#include <linux/hardirq.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netdevice.h>
diff --git a/net/can/Kconfig b/net/can/Kconfig
index a15c0e0d1fc7..a4399be54ff4 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -11,7 +11,7 @@ menuconfig CAN
1991, mainly for automotive, but now widely used in marine
(NMEA2000), industrial, and medical applications.
More information on the CAN network protocol family PF_CAN
- is contained in <Documentation/networking/can.txt>.
+ is contained in <Documentation/networking/can.rst>.
If you want CAN support you should say Y here and also to the
specific driver for your controller(s) below.
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 003b2d6d655f..6da324550eec 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -321,13 +321,13 @@ EXPORT_SYMBOL(can_send);
* af_can rx path
*/
-static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net,
+static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net,
struct net_device *dev)
{
if (!dev)
return net->can.can_rx_alldev_list;
else
- return (struct dev_rcv_lists *)dev->ml_priv;
+ return (struct can_dev_rcv_lists *)dev->ml_priv;
}
/**
@@ -381,7 +381,7 @@ static unsigned int effhash(canid_t can_id)
* Reduced can_id to have a preprocessed filter compare value.
*/
static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
- struct dev_rcv_lists *d)
+ struct can_dev_rcv_lists *d)
{
canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
@@ -464,7 +464,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
{
struct receiver *r;
struct hlist_head *rl;
- struct dev_rcv_lists *d;
+ struct can_dev_rcv_lists *d;
struct s_pstats *can_pstats = net->can.can_pstats;
int err = 0;
@@ -542,7 +542,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
struct receiver *r = NULL;
struct hlist_head *rl;
struct s_pstats *can_pstats = net->can.can_pstats;
- struct dev_rcv_lists *d;
+ struct can_dev_rcv_lists *d;
if (dev && dev->type != ARPHRD_CAN)
return;
@@ -615,7 +615,7 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r)
r->matches++;
}
-static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
+static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
{
struct receiver *r;
int matches = 0;
@@ -682,7 +682,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
static void can_receive(struct sk_buff *skb, struct net_device *dev)
{
- struct dev_rcv_lists *d;
+ struct can_dev_rcv_lists *d;
struct net *net = dev_net(dev);
struct s_stats *can_stats = net->can.can_stats;
int matches;
@@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CAN_MTU ||
- cfd->len > CAN_MAX_DLEN,
- "PF_CAN: dropped non conform CAN skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
- goto drop;
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
+ cfd->len > CAN_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
}
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -742,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CANFD_MTU ||
- cfd->len > CANFD_MAX_DLEN,
- "PF_CAN: dropped non conform CAN FD skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
- goto drop;
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
+ cfd->len > CANFD_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
}
/*
@@ -829,7 +821,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct dev_rcv_lists *d;
+ struct can_dev_rcv_lists *d;
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
@@ -874,7 +866,7 @@ static int can_pernet_init(struct net *net)
{
spin_lock_init(&net->can.can_rcvlists_lock);
net->can.can_rx_alldev_list =
- kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
+ kzalloc(sizeof(struct can_dev_rcv_lists), GFP_KERNEL);
if (!net->can.can_rx_alldev_list)
goto out;
net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
@@ -920,7 +912,7 @@ static void can_pernet_exit(struct net *net)
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- struct dev_rcv_lists *d = dev->ml_priv;
+ struct can_dev_rcv_lists *d = dev->ml_priv;
BUG_ON(d->entries);
kfree(d);
diff --git a/net/can/af_can.h b/net/can/af_can.h
index eca6463c6213..9cb3719632bd 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -67,7 +67,7 @@ struct receiver {
enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
/* per device receive filters linked at dev->ml_priv */
-struct dev_rcv_lists {
+struct can_dev_rcv_lists {
struct hlist_head rx[RX_MAX];
struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 13690334efa3..ac5e5e34fee3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -246,7 +246,6 @@ static int bcm_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations bcm_proc_fops = {
- .owner = THIS_MODULE,
.open = bcm_proc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/can/gw.c b/net/can/gw.c
index 73a02af4b5d7..398dd0395ad9 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1014,6 +1014,8 @@ static struct pernet_operations cangw_pernet_ops = {
static __init int cgw_module_init(void)
{
+ int ret;
+
/* sanitize given module parameter */
max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS);
@@ -1031,15 +1033,19 @@ static __init int cgw_module_init(void)
notifier.notifier_call = cgw_notifier;
register_netdevice_notifier(&notifier);
- if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) {
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
+ NULL, cgw_dump_jobs, 0);
+ if (ret) {
unregister_netdevice_notifier(&notifier);
kmem_cache_destroy(cgw_cache);
return -ENOBUFS;
}
- /* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0);
- __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0);
+ /* Only the first call to rtnl_register_module can fail */
+ rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
+ cgw_create_job, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
+ cgw_remove_job, NULL, 0);
return 0;
}
diff --git a/net/can/proc.c b/net/can/proc.c
index 0c59f876fe6f..fdf704e9bb8c 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -276,7 +276,6 @@ static int can_stats_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations can_stats_proc_fops = {
- .owner = THIS_MODULE,
.open = can_stats_proc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -310,7 +309,6 @@ static int can_reset_stats_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations can_reset_stats_proc_fops = {
- .owner = THIS_MODULE,
.open = can_reset_stats_proc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -329,7 +327,6 @@ static int can_version_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations can_version_proc_fops = {
- .owner = THIS_MODULE,
.open = can_version_proc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -338,7 +335,7 @@ static const struct file_operations can_version_proc_fops = {
static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
struct net_device *dev,
- struct dev_rcv_lists *d)
+ struct can_dev_rcv_lists *d)
{
if (!hlist_empty(&d->rx[idx])) {
can_print_recv_banner(m);
@@ -353,7 +350,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
/* double cast to prevent GCC warning */
int idx = (int)(long)PDE_DATA(m->file->f_inode);
struct net_device *dev;
- struct dev_rcv_lists *d;
+ struct can_dev_rcv_lists *d;
struct net *net = m->private;
seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
@@ -382,7 +379,6 @@ static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations can_rcvlist_proc_fops = {
- .owner = THIS_MODULE,
.open = can_rcvlist_proc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -417,7 +413,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m,
static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
- struct dev_rcv_lists *d;
+ struct can_dev_rcv_lists *d;
struct net *net = m->private;
/* RX_SFF */
@@ -450,7 +446,6 @@ static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations can_rcvlist_sff_proc_fops = {
- .owner = THIS_MODULE,
.open = can_rcvlist_sff_proc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -461,7 +456,7 @@ static const struct file_operations can_rcvlist_sff_proc_fops = {
static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
- struct dev_rcv_lists *d;
+ struct can_dev_rcv_lists *d;
struct net *net = m->private;
/* RX_EFF */
@@ -494,7 +489,6 @@ static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations can_rcvlist_eff_proc_fops = {
- .owner = THIS_MODULE,
.open = can_rcvlist_eff_proc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/can/raw.c b/net/can/raw.c
index 864c80dbdb72..f2ecc43376a1 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -401,6 +401,8 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (len < sizeof(*addr))
return -EINVAL;
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
lock_sock(sk);
diff --git a/net/core/Makefile b/net/core/Makefile
index 1fd0a9c88b1b..6dbbba8c57ae 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o
+ fib_notifier.o xdp.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 522873ed120b..b7d9293940b5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -72,12 +72,10 @@ static inline int connection_based(struct sock *sk)
static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
void *key)
{
- unsigned long bits = (unsigned long)key;
-
/*
* Avoid a wakeup if event not interesting for us
*/
- if (bits && !(bits & (POLLIN | POLLERR)))
+ if (key && !(key_to_poll(key) & (POLLIN | POLLERR)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
@@ -833,11 +831,11 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
-unsigned int datagram_poll(struct file *file, struct socket *sock,
+__poll_t datagram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 07ed21d64f92..dda9d7b9a840 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
* when the name is long and there isn't enough space left
* for the digits, or if all bits are used.
*/
- return p ? -ENFILE : -EEXIST;
+ return -ENFILE;
}
static int dev_alloc_name_ns(struct net *net,
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- return dev_alloc_name_ns(net, dev, name);
+ BUG_ON(!net);
+
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ if (strchr(name, '%'))
+ return dev_alloc_name_ns(net, dev, name);
+ else if (__dev_get_by_name(net, name))
+ return -EEXIST;
+ else if (dev->name != name)
+ strlcpy(dev->name, name, IFNAMSIZ);
+
+ return 0;
}
EXPORT_SYMBOL(dev_get_valid_name);
@@ -1542,6 +1554,23 @@ void dev_disable_lro(struct net_device *dev)
}
EXPORT_SYMBOL(dev_disable_lro);
+/**
+ * dev_disable_gro_hw - disable HW Generic Receive Offload on a device
+ * @dev: device
+ *
+ * Disable HW Generic Receive Offload (GRO_HW) on a net device. Must be
+ * called under RTNL. This is needed if Generic XDP is installed on
+ * the device.
+ */
+static void dev_disable_gro_hw(struct net_device *dev)
+{
+ dev->wanted_features &= ~NETIF_F_GRO_HW;
+ netdev_update_features(dev);
+
+ if (unlikely(dev->features & NETIF_F_GRO_HW))
+ netdev_WARN(dev, "failed to disable GRO_HW!\n");
+}
+
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
@@ -1665,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
- * @dev: net_device pointer passed unmodified to notifier function
* @info: notifier information data
*
* Call all network notifier blocks. Parameters and return value
@@ -2803,7 +2831,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path)))
+ if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
@@ -3042,7 +3070,7 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);
-static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
{
netdev_features_t features;
@@ -3066,9 +3094,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
__skb_linearize(skb))
goto out_kfree_skb;
- if (validate_xmit_xfrm(skb, features))
- goto out_kfree_skb;
-
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
@@ -3085,6 +3110,8 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
}
}
+ skb = validate_xmit_xfrm(skb, features, again);
+
return skb;
out_kfree_skb:
@@ -3094,7 +3121,7 @@ out_null:
return NULL;
}
-struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
{
struct sk_buff *next, *head = NULL, *tail;
@@ -3105,7 +3132,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
/* in case skb wont be segmented, point to itself */
skb->prev = skb;
- skb = validate_xmit_skb(skb, dev);
+ skb = validate_xmit_skb(skb, dev, again);
if (!skb)
continue;
@@ -3139,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
@@ -3162,6 +3200,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
int rc;
qdisc_calculate_pkt_len(skb, q);
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ __qdisc_drop(skb, &to_free);
+ rc = NET_XMIT_DROP;
+ } else {
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ __qdisc_run(q);
+ }
+
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
+ return rc;
+ }
+
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
@@ -3192,9 +3245,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
- } else
- qdisc_run_end(q);
+ }
+ qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
@@ -3204,6 +3257,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
+ qdisc_run_end(q);
}
}
spin_unlock(root_lock);
@@ -3376,8 +3430,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
else
queue_index = __netdev_pick_tx(dev, skb);
- if (!accel_priv)
- queue_index = netdev_cap_txqueue(dev, queue_index);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
@@ -3416,6 +3469,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
struct netdev_queue *txq;
struct Qdisc *q;
int rc = -ENOMEM;
+ bool again = false;
skb_reset_mac_header(skb);
@@ -3477,7 +3531,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
XMIT_RECURSION_LIMIT))
goto recursion_alert;
- skb = validate_xmit_skb(skb, dev);
+ skb = validate_xmit_skb(skb, dev, &again);
if (!skb)
goto out;
@@ -3873,9 +3927,33 @@ drop:
return NET_RX_DROP;
}
+static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_rx_queue *rxqueue;
+
+ rxqueue = dev->_rx;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+
+ if (unlikely(index >= dev->real_num_rx_queues)) {
+ WARN_ONCE(dev->real_num_rx_queues > 1,
+ "%s received packet on queue %u, but number "
+ "of RX queues is %u\n",
+ dev->name, index, dev->real_num_rx_queues);
+
+ return rxqueue; /* Return first rxqueue */
+ }
+ rxqueue += index;
+ }
+ return rxqueue;
+}
+
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
void *orig_data;
@@ -3904,7 +3982,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
goto do_drop;
- if (troom > 0 && __skb_linearize(skb))
+ if (skb_linearize(skb))
goto do_drop;
}
@@ -3919,6 +3997,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
+ rxqueue = netif_get_rxqueue(skb);
+ xdp.rxq = &rxqueue->xdp_rxq;
+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
off = xdp.data - orig_data;
@@ -4143,21 +4224,26 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
while (head) {
struct Qdisc *q = head;
- spinlock_t *root_lock;
+ spinlock_t *root_lock = NULL;
head = head->next_sched;
- root_lock = qdisc_lock(q);
- spin_lock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK)) {
+ root_lock = qdisc_lock(q);
+ spin_lock(root_lock);
+ }
/* We need to make sure head->next_sched is read
* before clearing __QDISC_STATE_SCHED
*/
smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED, &q->state);
qdisc_run(q);
- spin_unlock(root_lock);
+ if (root_lock)
+ spin_unlock(root_lock);
}
}
+
+ xfrm_dev_backlog(sd);
}
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
@@ -4545,6 +4631,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
} else if (new && !old) {
static_key_slow_inc(&generic_xdp_needed);
dev_disable_lro(dev);
+ dev_disable_gro_hw(dev);
}
break;
@@ -6348,6 +6435,7 @@ rollback:
* netdev_upper_dev_link - Add a link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. The caller must hold
* the RTNL lock. On a failure a negative errno code is returned.
@@ -6369,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* @upper_dev: new upper device
* @upper_priv: upper device private
* @upper_info: upper info to be passed down via notifier
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
@@ -6959,6 +7048,35 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
EXPORT_SYMBOL(dev_set_mtu);
/**
+ * dev_change_tx_queue_len - Change TX queue length of a netdevice
+ * @dev: device
+ * @new_len: new tx queue length
+ */
+int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
+{
+ unsigned int orig_len = dev->tx_queue_len;
+ int res;
+
+ if (new_len != (unsigned int)new_len)
+ return -ERANGE;
+
+ if (new_len != orig_len) {
+ dev->tx_queue_len = new_len;
+ res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ res = notifier_to_errno(res);
+ if (res) {
+ netdev_err(dev,
+ "refused to change device tx_queue_len\n");
+ dev->tx_queue_len = orig_len;
+ return res;
+ }
+ return dev_qdisc_change_tx_queue_len(dev);
+ }
+
+ return 0;
+}
+
+/**
* dev_set_group - Change group this device belongs to
* @dev: device
* @new_group: group this device should belong to
@@ -7073,17 +7191,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
-u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
+void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+ struct netdev_bpf *xdp)
{
- struct netdev_bpf xdp;
-
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
+ memset(xdp, 0, sizeof(*xdp));
+ xdp->command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(bpf_op(dev, &xdp) < 0);
- if (prog_id)
- *prog_id = xdp.prog_id;
+ WARN_ON(bpf_op(dev, xdp) < 0);
+}
+
+static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
+{
+ struct netdev_bpf xdp;
+
+ __dev_xdp_query(dev, bpf_op, &xdp);
return xdp.prog_attached;
}
@@ -7106,6 +7228,27 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
return bpf_op(dev, &xdp);
}
+static void dev_xdp_uninstall(struct net_device *dev)
+{
+ struct netdev_bpf xdp;
+ bpf_op_t ndo_bpf;
+
+ /* Remove generic XDP */
+ WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+
+ /* Remove from the driver */
+ ndo_bpf = dev->netdev_ops->ndo_bpf;
+ if (!ndo_bpf)
+ return;
+
+ __dev_xdp_query(dev, ndo_bpf, &xdp);
+ if (xdp.prog_attached == XDP_ATTACHED_NONE)
+ return;
+
+ /* Program removal should always succeed */
+ WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+}
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
@@ -7134,10 +7277,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, bpf_op, NULL))
+ __dev_xdp_attached(dev, bpf_op))
return -EBUSY;
prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
@@ -7236,6 +7379,7 @@ static void rollback_registered_many(struct list_head *head)
/* Shutdown queueing discipline. */
dev_shutdown(dev);
+ dev_xdp_uninstall(dev);
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
@@ -7245,7 +7389,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL, NULL);
+ GFP_KERNEL, NULL, 0);
/*
* Flush the unicast and multicast chains
@@ -7379,6 +7523,18 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~dev->gso_partial_features;
}
+ if (!(features & NETIF_F_RXCSUM)) {
+ /* NETIF_F_GRO_HW implies doing RXCSUM since every packet
+ * successfully merged by hardware must also have the
+ * checksum verified by hardware. If the user does not
+ * want to enable RXCSUM, logically, we should disable GRO_HW.
+ */
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
return features;
}
@@ -7512,12 +7668,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
-#ifdef CONFIG_SYSFS
static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
size_t sz = count * sizeof(*rx);
+ int err = 0;
BUG_ON(count < 1);
@@ -7527,11 +7683,38 @@ static int netif_alloc_rx_queues(struct net_device *dev)
dev->_rx = rx;
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
rx[i].dev = dev;
+
+ /* XDP RX-queue setup */
+ err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
+ if (err < 0)
+ goto err_rxq_info;
+ }
return 0;
+
+err_rxq_info:
+ /* Rollback successful reg's and free other resources */
+ while (i--)
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+ kvfree(dev->_rx);
+ dev->_rx = NULL;
+ return err;
+}
+
+static void netif_free_rx_queues(struct net_device *dev)
+{
+ unsigned int i, count = dev->num_rx_queues;
+
+ /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
+ if (!dev->_rx)
+ return;
+
+ for (i = 0; i < count; i++)
+ xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
+
+ kvfree(dev->_rx);
}
-#endif
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue, void *_unused)
@@ -8092,12 +8275,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
-#ifdef CONFIG_SYSFS
if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
return NULL;
}
-#endif
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
@@ -8154,12 +8335,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (netif_alloc_netdev_queues(dev))
goto free_all;
-#ifdef CONFIG_SYSFS
dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
-#endif
strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -8195,13 +8374,10 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
- struct bpf_prog *prog;
might_sleep();
netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kvfree(dev->_rx);
-#endif
+ netif_free_rx_queues(dev);
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -8214,12 +8390,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
- prog = rcu_dereference_protected(dev->xdp_prog, 1);
- if (prog) {
- bpf_prog_put(prog);
- static_key_slow_dec(&generic_xdp_needed);
- }
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
@@ -8332,7 +8502,7 @@ EXPORT_SYMBOL(unregister_netdev);
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err, new_nsid;
+ int err, new_nsid, new_ifindex;
ASSERT_RTNL();
@@ -8388,11 +8558,16 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
- new_nsid = peernet2id_alloc(dev_net(dev), net);
+
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ /* If there is an ifindex conflict assign a new one */
+ if (__dev_get_by_index(net, dev->ifindex))
+ new_ifindex = dev_new_index(net);
else
- new_nsid = peernet2id(dev_net(dev), net);
- rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
+ new_ifindex = dev->ifindex;
+
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
+ new_ifindex);
/*
* Flush the unicast and multicast chains
@@ -8406,10 +8581,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* Actually switch the network namespace */
dev_net_set(dev, net);
-
- /* If there is an ifindex conflict assign a new one */
- if (__dev_get_by_index(net, dev->ifindex))
- dev->ifindex = dev_new_index(net);
+ dev->ifindex = new_ifindex;
/* Send a netdev-add uevent to the new namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
@@ -8807,6 +8979,9 @@ static int __init net_dev_init(void)
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
+#ifdef CONFIG_XFRM_OFFLOAD
+ skb_queue_head_init(&sd->xfrm_backlog);
+#endif
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7e690d0ccd05..0ab1af04296c 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -18,26 +18,10 @@
* match. --pb
*/
-static int dev_ifname(struct net *net, struct ifreq __user *arg)
+static int dev_ifname(struct net *net, struct ifreq *ifr)
{
- struct ifreq ifr;
- int error;
-
- /*
- * Fetch the caller's info block.
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
- ifr.ifr_name[IFNAMSIZ-1] = 0;
-
- error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
- if (error)
- return error;
-
- if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- return 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
+ return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
static gifconf_func_t *gifconf_list[NPROTO];
@@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf);
* Thus we will need a 'compatibility mode'.
*/
-static int dev_ifconf(struct net *net, char __user *arg)
+int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
{
- struct ifconf ifc;
struct net_device *dev;
char __user *pos;
int len;
@@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg)
* Fetch the caller's info block.
*/
- if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
- return -EFAULT;
-
- pos = ifc.ifc_buf;
- len = ifc.ifc_len;
+ pos = ifc->ifc_buf;
+ len = ifc->ifc_len;
/*
* Loop over the interfaces, and write an info block for each.
@@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg)
if (gifconf_list[i]) {
int done;
if (!pos)
- done = gifconf_list[i](dev, NULL, 0);
+ done = gifconf_list[i](dev, NULL, 0, size);
else
done = gifconf_list[i](dev, pos + total,
- len - total);
+ len - total, size);
if (done < 0)
return -EFAULT;
total += done;
@@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg)
/*
* All done. Write the updated control block back to the caller.
*/
- ifc.ifc_len = total;
+ ifc->ifc_len = total;
/*
* Both BSD and Solaris return 0 here, so we do too.
*/
- return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+ return 0;
}
/*
@@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load);
* positive or a negative errno code on error.
*/
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
{
- struct ifreq ifr;
int ret;
char *colon;
- /* One special case: SIOCGIFCONF takes ifconf argument
- and requires shared lock, because it sleeps writing
- to user space.
- */
-
- if (cmd == SIOCGIFCONF) {
- rtnl_lock();
- ret = dev_ifconf(net, (char __user *) arg);
- rtnl_unlock();
- return ret;
- }
+ if (need_copyout)
+ *need_copyout = true;
if (cmd == SIOCGIFNAME)
- return dev_ifname(net, (struct ifreq __user *)arg);
-
- /*
- * Take care of Wireless Extensions. Unfortunately struct iwreq
- * isn't a proper subset of struct ifreq (it's 8 byte shorter)
- * so we need to treat it specially, otherwise applications may
- * fault if the struct they're passing happens to land at the
- * end of a mapped page.
- */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- struct iwreq iwr;
-
- if (copy_from_user(&iwr, arg, sizeof(iwr)))
- return -EFAULT;
-
- iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+ return dev_ifname(net, ifr);
- return wext_handle_ioctl(net, &iwr, cmd, arg);
- }
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
-
- ifr.ifr_name[IFNAMSIZ-1] = 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
+ dev_load(net, ifr->ifr_name);
+
/*
* See which interface the caller is talking about.
*/
@@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
- dev_load(net, ifr.ifr_name);
rcu_read_lock();
- ret = dev_ifsioc_locked(net, &ifr, cmd);
+ ret = dev_ifsioc_locked(net, ifr, cmd);
rcu_read_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
case SIOCETHTOOL:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ethtool(net, &ifr);
+ ret = dev_ethtool(net, ifr);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFNAME:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
+ if (need_copyout)
+ *need_copyout = false;
return ret;
case SIOCGIFMEM:
@@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
cmd == SIOCGHWTSTAMP ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret && copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
return ret;
}
return -ENOTTY;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7d430c1d9c3e..18d385ed8237 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -92,12 +92,6 @@ static LIST_HEAD(devlink_list);
*/
static DEFINE_MUTEX(devlink_mutex);
-/* devlink_port_mutex
- *
- * Shared lock to guard lists of ports in all devlink devices.
- */
-static DEFINE_MUTEX(devlink_port_mutex);
-
static struct net *devlink_net(const struct devlink *devlink)
{
return read_pnet(&devlink->_net);
@@ -335,15 +329,18 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_SB BIT(2)
-#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3)
- /* port is not needed but we need to ensure they don't
- * change in the middle of command
- */
+
+/* The per devlink instance lock is taken by default in the pre-doit
+ * operation, yet several commands do not require this. The global
+ * devlink lock is taken and protects from disruption by user-calls.
+ */
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(3)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink;
+ int err;
mutex_lock(&devlink_mutex);
devlink = devlink_get_from_info(info);
@@ -351,44 +348,47 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
mutex_unlock(&devlink_mutex);
return PTR_ERR(devlink);
}
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ mutex_lock(&devlink->lock);
if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
info->user_ptr[0] = devlink;
} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
struct devlink_port *devlink_port;
- mutex_lock(&devlink_port_mutex);
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
- mutex_unlock(&devlink_port_mutex);
- mutex_unlock(&devlink_mutex);
- return PTR_ERR(devlink_port);
+ err = PTR_ERR(devlink_port);
+ goto unlock;
}
info->user_ptr[0] = devlink_port;
}
- if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) {
- mutex_lock(&devlink_port_mutex);
- }
if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
struct devlink_sb *devlink_sb;
devlink_sb = devlink_sb_get_from_info(devlink, info);
if (IS_ERR(devlink_sb)) {
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
- mutex_unlock(&devlink_port_mutex);
- mutex_unlock(&devlink_mutex);
- return PTR_ERR(devlink_sb);
+ err = PTR_ERR(devlink_sb);
+ goto unlock;
}
info->user_ptr[1] = devlink_sb;
}
return 0;
+
+unlock:
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ mutex_unlock(&devlink->lock);
+ mutex_unlock(&devlink_mutex);
+ return err;
}
static void devlink_nl_post_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT ||
- ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS)
- mutex_unlock(&devlink_port_mutex);
+ struct devlink *devlink;
+
+ devlink = devlink_get_from_info(info);
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -614,10 +614,10 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
int err;
mutex_lock(&devlink_mutex);
- mutex_lock(&devlink_port_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
if (idx < start) {
idx++;
@@ -628,13 +628,15 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
- if (err)
+ if (err) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
idx++;
}
+ mutex_unlock(&devlink->lock);
}
out:
- mutex_unlock(&devlink_port_mutex);
mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
@@ -801,6 +803,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
if (idx < start) {
idx++;
@@ -811,10 +814,13 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
- if (err)
+ if (err) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
idx++;
}
+ mutex_unlock(&devlink->lock);
}
out:
mutex_unlock(&devlink_mutex);
@@ -935,14 +941,18 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops || !devlink->ops->sb_pool_get)
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
}
+ mutex_unlock(&devlink->lock);
}
out:
mutex_unlock(&devlink_mutex);
@@ -1123,22 +1133,24 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
int err;
mutex_lock(&devlink_mutex);
- mutex_lock(&devlink_port_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops || !devlink->ops->sb_port_pool_get)
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_port_pool_get_dumpit(msg, start, &idx,
devlink, devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
}
+ mutex_unlock(&devlink->lock);
}
out:
- mutex_unlock(&devlink_port_mutex);
mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
@@ -1347,23 +1359,26 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
int err;
mutex_lock(&devlink_mutex);
- mutex_lock(&devlink_port_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
continue;
+
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
devlink,
devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
}
+ mutex_unlock(&devlink->lock);
}
out:
- mutex_unlock(&devlink_port_mutex);
mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
@@ -1679,6 +1694,12 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
table->counters_enabled))
goto nla_put_failure;
+ if (table->resource_valid) {
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+ table->resource_id, DEVLINK_ATTR_PAD);
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+ table->resource_units, DEVLINK_ATTR_PAD);
+ }
if (devlink_dpipe_matches_put(table, skb))
goto nla_put_failure;
@@ -2273,6 +2294,273 @@ static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
counters_enable);
}
+static struct devlink_resource *
+devlink_resource_find(struct devlink *devlink,
+ struct devlink_resource *resource, u64 resource_id)
+{
+ struct list_head *resource_list;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ struct devlink_resource *child_resource;
+
+ if (resource->id == resource_id)
+ return resource;
+
+ child_resource = devlink_resource_find(devlink, resource,
+ resource_id);
+ if (child_resource)
+ return child_resource;
+ }
+ return NULL;
+}
+
+static void
+devlink_resource_validate_children(struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ bool size_valid = true;
+ u64 parts_size = 0;
+
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list)
+ parts_size += child_resource->size_new;
+
+ if (parts_size > resource->size)
+ size_valid = false;
+out:
+ resource->size_valid = size_valid;
+}
+
+static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ u64 resource_id;
+ u64 size;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_RESOURCE_ID] ||
+ !info->attrs[DEVLINK_ATTR_RESOURCE_SIZE])
+ return -EINVAL;
+ resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource)
+ return -EINVAL;
+
+ if (!resource->resource_ops->size_validate)
+ return -EINVAL;
+
+ size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
+ err = resource->resource_ops->size_validate(devlink, size,
+ info->extack);
+ if (err)
+ return err;
+
+ resource->size_new = size;
+ devlink_resource_validate_children(resource);
+ if (resource->parent)
+ devlink_resource_validate_children(resource->parent);
+ return 0;
+}
+
+static void
+devlink_resource_size_params_put(struct devlink_resource *resource,
+ struct sk_buff *skb)
+{
+ struct devlink_resource_size_params *size_params;
+
+ size_params = resource->size_params;
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+ size_params->size_granularity, DEVLINK_ATTR_PAD);
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+ size_params->size_max, DEVLINK_ATTR_PAD);
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+ size_params->size_min, DEVLINK_ATTR_PAD);
+ nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit);
+}
+
+static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ struct nlattr *child_resource_attr;
+ struct nlattr *resource_attr;
+
+ resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE);
+ if (!resource_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
+ DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (resource->size != resource->size_new)
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new, DEVLINK_ATTR_PAD);
+ if (resource->resource_ops && resource->resource_ops->occ_get)
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+ resource->resource_ops->occ_get(devlink),
+ DEVLINK_ATTR_PAD);
+ devlink_resource_size_params_put(resource, skb);
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
+ resource->size_valid))
+ goto nla_put_failure;
+
+ child_resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
+ if (!child_resource_attr)
+ goto nla_put_failure;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list) {
+ if (devlink_resource_put(devlink, skb, child_resource))
+ goto resource_put_failure;
+ }
+
+ nla_nest_end(skb, child_resource_attr);
+out:
+ nla_nest_end(skb, resource_attr);
+ return 0;
+
+resource_put_failure:
+ nla_nest_cancel(skb, child_resource_attr);
+nla_put_failure:
+ nla_nest_cancel(skb, resource_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_resource_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ struct nlattr *resources_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ resource = list_first_entry(&devlink->resource_list,
+ struct devlink_resource, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+
+ resources_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
+ if (!resources_attr)
+ goto nla_put_failure;
+
+ incomplete = false;
+ i = 0;
+ list_for_each_entry_from(resource, &devlink->resource_list, list) {
+ err = devlink_resource_put(devlink, skb, resource);
+ if (err) {
+ if (!i)
+ goto err_resource_put;
+ incomplete = true;
+ break;
+ }
+ i++;
+ }
+ nla_nest_end(skb, resources_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_resource_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (list_empty(&devlink->resource_list))
+ return -EOPNOTSUPP;
+
+ return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
+}
+
+static int
+devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
+{
+ struct list_head *resource_list;
+ int err = 0;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ if (!resource->size_valid)
+ return -EINVAL;
+ err = devlink_resources_validate(devlink, resource, info);
+ if (err)
+ return err;
+ }
+ return err;
+}
+
+static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ int err;
+
+ if (!devlink->ops->reload)
+ return -EOPNOTSUPP;
+
+ err = devlink_resources_validate(devlink, NULL, info);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
+ return err;
+ }
+ return devlink->ops->reload(devlink);
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -2291,6 +2579,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
+ [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -2322,14 +2612,16 @@ static const struct genl_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_port_split_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.doit = devlink_nl_cmd_port_unsplit_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_SB_GET,
@@ -2397,8 +2689,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB |
- DEVLINK_NL_FLAG_LOCK_PORTS,
+ DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
@@ -2406,8 +2697,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB |
- DEVLINK_NL_FLAG_LOCK_PORTS,
+ DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_ESWITCH_GET,
@@ -2451,6 +2741,28 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_SET,
+ .doit = devlink_nl_cmd_resource_set,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_DUMP,
+ .doit = devlink_nl_cmd_resource_dump,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_RELOAD,
+ .doit = devlink_nl_cmd_reload,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -2488,6 +2800,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
+ INIT_LIST_HEAD(&devlink->resource_list);
+ mutex_init(&devlink->lock);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -2550,16 +2864,16 @@ int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
unsigned int port_index)
{
- mutex_lock(&devlink_port_mutex);
+ mutex_lock(&devlink->lock);
if (devlink_port_index_exists(devlink, port_index)) {
- mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink->lock);
return -EEXIST;
}
devlink_port->devlink = devlink;
devlink_port->index = port_index;
devlink_port->registered = true;
list_add_tail(&devlink_port->list, &devlink->port_list);
- mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink->lock);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
return 0;
}
@@ -2572,10 +2886,12 @@ EXPORT_SYMBOL_GPL(devlink_port_register);
*/
void devlink_port_unregister(struct devlink_port *devlink_port)
{
+ struct devlink *devlink = devlink_port->devlink;
+
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- mutex_lock(&devlink_port_mutex);
+ mutex_lock(&devlink->lock);
list_del(&devlink_port->list);
- mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_port_unregister);
@@ -2651,7 +2967,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
struct devlink_sb *devlink_sb;
int err = 0;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
if (devlink_sb_index_exists(devlink, sb_index)) {
err = -EEXIST;
goto unlock;
@@ -2670,7 +2986,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
devlink_sb->egress_tc_count = egress_tc_count;
list_add_tail(&devlink_sb->list, &devlink->sb_list);
unlock:
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
return err;
}
EXPORT_SYMBOL_GPL(devlink_sb_register);
@@ -2679,11 +2995,11 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
{
struct devlink_sb *devlink_sb;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
WARN_ON(!devlink_sb);
list_del(&devlink_sb->list);
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
kfree(devlink_sb);
}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
@@ -2699,9 +3015,9 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
int devlink_dpipe_headers_register(struct devlink *devlink,
struct devlink_dpipe_headers *dpipe_headers)
{
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
devlink->dpipe_headers = dpipe_headers;
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
return 0;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
@@ -2715,9 +3031,9 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
*/
void devlink_dpipe_headers_unregister(struct devlink *devlink)
{
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
devlink->dpipe_headers = NULL;
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
@@ -2783,9 +3099,9 @@ int devlink_dpipe_table_register(struct devlink *devlink,
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
return 0;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -2801,20 +3117,182 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
{
struct devlink_dpipe_table *table;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
table_name);
if (!table)
goto unlock;
list_del_rcu(&table->list);
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
kfree_rcu(table, rcu);
return;
unlock:
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+/**
+ * devlink_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @top_hierarchy: top hierarchy
+ * @reload_required: reload is required for new configuration to
+ * apply
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_reosurce_id: resource's parent id
+ * @size params: size parameters
+ * @resource_ops: resource ops
+ */
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ bool top_hierarchy,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ struct devlink_resource_size_params *size_params,
+ const struct devlink_resource_ops *resource_ops)
+{
+ struct devlink_resource *resource;
+ struct list_head *resource_list;
+ int err = 0;
+
+ mutex_lock(&devlink->lock);
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (resource) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ resource = kzalloc(sizeof(*resource), GFP_KERNEL);
+ if (!resource) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (top_hierarchy) {
+ resource_list = &devlink->resource_list;
+ } else {
+ struct devlink_resource *parent_resource;
+
+ parent_resource = devlink_resource_find(devlink, NULL,
+ parent_resource_id);
+ if (parent_resource) {
+ resource_list = &parent_resource->resource_list;
+ resource->parent = parent_resource;
+ } else {
+ kfree(resource);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ resource->name = resource_name;
+ resource->size = resource_size;
+ resource->size_new = resource_size;
+ resource->id = resource_id;
+ resource->resource_ops = resource_ops;
+ resource->size_valid = true;
+ resource->size_params = size_params;
+ INIT_LIST_HEAD(&resource->resource_list);
+ list_add_tail(&resource->list, resource_list);
+out:
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_resource_register);
+
+/**
+ * devlink_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ * @resource: resource
+ */
+void devlink_resources_unregister(struct devlink *devlink,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *tmp, *child_resource;
+ struct list_head *resource_list;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ if (!resource)
+ mutex_lock(&devlink->lock);
+
+ list_for_each_entry_safe(child_resource, tmp, resource_list, list) {
+ devlink_resources_unregister(devlink, child_resource);
+ list_del(&child_resource->list);
+ kfree(child_resource);
+ }
+
+ if (!resource)
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resources_unregister);
+
+/**
+ * devlink_resource_size_get - get and update size
+ *
+ * @devlink: devlink
+ * @resource_id: the requested resource id
+ * @p_resource_size: ptr to update
+ */
+int devlink_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size)
+{
+ struct devlink_resource *resource;
+ int err = 0;
+
+ mutex_lock(&devlink->lock);
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource) {
+ err = -EINVAL;
+ goto out;
+ }
+ *p_resource_size = resource->size_new;
+ resource->size = resource->size_new;
+out:
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_resource_size_get);
+
+/**
+ * devlink_dpipe_table_resource_set - set the resource id
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @resource_id: resource id
+ * @resource_units: number of resource's units consumed per table's entry
+ */
+int devlink_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
+{
+ struct devlink_dpipe_table *table;
+ int err = 0;
+
+ mutex_lock(&devlink->lock);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table) {
+ err = -EINVAL;
+ goto out;
+ }
+ table->resource_id = resource_id;
+ table->resource_units = resource_units;
+ table->resource_valid = true;
+out:
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/dst.c b/net/core/dst.c
index 662a2d4a3d19..007aa0b08291 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/prefetch.h>
#include <net/lwtunnel.h>
+#include <net/xfrm.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -62,15 +63,12 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
unsigned short flags)
{
- dst->child = NULL;
dst->dev = dev;
if (dev)
dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
- dst->path = dst;
- dst->from = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -88,7 +86,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
- dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
}
@@ -116,12 +113,17 @@ EXPORT_SYMBOL(dst_alloc);
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
- struct dst_entry *child;
+ struct dst_entry *child = NULL;
smp_rmb();
- child = dst->child;
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+ child = xdst->child;
+ }
+#endif
if (!(dst->flags & DST_NOCOUNT))
dst_entries_add(dst->ops, -1);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f8fcf450a36e..107b122c8969 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -73,6 +73,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_LLTX_BIT] = "tx-lockless",
[NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
[NETIF_F_GRO_BIT] = "rx-gro",
+ [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
[NETIF_F_LRO_BIT] = "rx-lro",
[NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
@@ -770,15 +771,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
}
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
- char name[sizeof(current->comm)];
-
- pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
- get_task_comm(name, current), details);
-}
-
/* Query device for its ethtool_cmd settings.
*
* Backward compatibility note: for compatibility with legacy ethtool,
@@ -805,10 +797,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
&link_ksettings);
if (err < 0)
return err;
- if (!convert_link_ksettings_to_legacy_settings(&cmd,
- &link_ksettings))
- warn_incomplete_ethtool_legacy_settings_conversion(
- "link modes are only partially reported");
+ convert_link_ksettings_to_legacy_settings(&cmd,
+ &link_ksettings);
/* send a sensible cmd tag back to user */
cmd.cmd = ETHTOOL_GSET;
@@ -1703,14 +1693,23 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
{
- struct ethtool_ringparam ringparam;
+ struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM };
- if (!dev->ethtool_ops->set_ringparam)
+ if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam)
return -EOPNOTSUPP;
if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
return -EFAULT;
+ dev->ethtool_ops->get_ringparam(dev, &max);
+
+ /* ensure new ring parameters are within the maximums */
+ if (ringparam.rx_pending > max.rx_max_pending ||
+ ringparam.rx_mini_pending > max.rx_mini_max_pending ||
+ ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending ||
+ ringparam.tx_pending > max.tx_max_pending)
+ return -EINVAL;
+
return dev->ethtool_ops->set_ringparam(dev, &ringparam);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..08ab4c65a998 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -401,8 +401,8 @@ do_pass:
/* Classic BPF expects A and X to be reset first. These need
* to be guaranteed to be the first two instructions.
*/
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
/* All programs must keep CTX in callee saved BPF_REG_CTX.
* In eBPF case it's done by the compiler, here we need to
@@ -458,6 +458,17 @@ do_pass:
convert_bpf_extensions(fp, &insn))
break;
+ if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+ fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
+ *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+ /* Error with exception code on div/mod by 0.
+ * For cBPF programs, this was always return 0.
+ */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
+ *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *insn++ = BPF_EXIT_INSN();
+ }
+
*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
@@ -1054,11 +1065,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/
goto out_err_free;
- /* We are guaranteed to never error here with cBPF to eBPF
- * transitions, since there's no issue with type compatibility
- * checks on program arrays.
- */
fp = bpf_prog_select_runtime(fp, &err);
+ if (err)
+ goto out_err_free;
kfree(old_prog);
return fp;
@@ -2684,8 +2693,9 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
return 0;
}
-int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+static int xdp_do_generic_redirect_map(struct net_device *dev,
+ struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
@@ -2862,7 +2872,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
- .arg5_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -3013,6 +3023,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
if (flags & BPF_F_DONT_FRAGMENT)
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+ if (flags & BPF_F_ZERO_CSUM_TX)
+ info->key.tun_flags &= ~TUNNEL_CSUM;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -3026,8 +3038,6 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
- if (flags & BPF_F_ZERO_CSUM_TX)
- info->key.tun_flags &= ~TUNNEL_CSUM;
}
return 0;
@@ -3151,7 +3161,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
- .arg5_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
@@ -3229,6 +3239,29 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
#ifdef CONFIG_INET
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ if (val < -1 || val > 0xff) {
+ ret = -EINVAL;
+ } else {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (val == -1)
+ val = 0;
+ np->tclass = val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+#endif
} else if (level == SOL_TCP &&
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
@@ -3238,7 +3271,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false, reinit);
+ ret = tcp_set_congestion_control(sk, name, false,
+ reinit);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -3304,6 +3338,22 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
} else {
goto err_clear;
}
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ goto err_clear;
+
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ *((int *)optval) = (int)np->tclass;
+ break;
+ default:
+ goto err_clear;
+ }
+#endif
} else {
goto err_clear;
}
@@ -3325,6 +3375,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+ int, argval)
+{
+ struct sock *sk = bpf_sock->sk;
+ int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+ if (!sk_fullsock(sk))
+ return -EINVAL;
+
+#ifdef CONFIG_INET
+ if (val)
+ tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+ return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+ return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+ .func = bpf_sock_ops_cb_flags_set,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3457,6 +3534,8 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_xdp_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_csum_diff:
+ return &bpf_csum_diff_proto;
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
case BPF_FUNC_xdp_adjust_meta:
@@ -3505,6 +3584,8 @@ static const struct bpf_func_proto *
return &bpf_setsockopt_proto;
case BPF_FUNC_getsockopt:
return &bpf_getsockopt_proto;
+ case BPF_FUNC_sock_ops_cb_flags_set:
+ return &bpf_sock_ops_cb_flags_set_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -3821,34 +3902,44 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-static bool __is_valid_sock_ops_access(int off, int size)
+static bool sock_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
if (off < 0 || off >= sizeof(struct bpf_sock_ops))
return false;
+
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
- return false;
- return true;
-}
-
-static bool sock_ops_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
-{
if (type == BPF_WRITE) {
switch (off) {
- case offsetof(struct bpf_sock_ops, op) ...
- offsetof(struct bpf_sock_ops, replylong[3]):
+ case offsetof(struct bpf_sock_ops, reply):
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ if (size != size_default)
+ return false;
break;
default:
return false;
}
+ } else {
+ switch (off) {
+ case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
+ bytes_acked):
+ if (size != sizeof(__u64))
+ return false;
+ break;
+ default:
+ if (size != size_default)
+ return false;
+ break;
+ }
}
- return __is_valid_sock_ops_access(off, size);
+ return true;
}
static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -4303,6 +4394,24 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data_end));
break;
+ case offsetof(struct xdp_md, ingress_ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_rxq_info, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+ case offsetof(struct xdp_md, rx_queue_index):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_rxq_info,
+ queue_index));
+ break;
}
return insn - insn_buf;
@@ -4437,6 +4546,211 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_num));
break;
+
+ case offsetof(struct bpf_sock_ops, is_fullsock):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern,
+ is_fullsock),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ is_fullsock));
+ break;
+
+ case offsetof(struct bpf_sock_ops, state):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_state));
+ break;
+
+ case offsetof(struct bpf_sock_ops, rtt_min):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ sizeof(struct minmax));
+ BUILD_BUG_ON(sizeof(struct minmax) <
+ sizeof(struct minmax_sample));
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct tcp_sock, rtt_min) +
+ FIELD_SIZEOF(struct minmax_sample, t));
+ break;
+
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
+ do { \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
+ OBJ_FIELD), \
+ si->dst_reg, si->dst_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ } while (0)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
+ do { \
+ int reg = BPF_REG_9; \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ } while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
+ do { \
+ if (TYPE == BPF_WRITE) \
+ SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
+ else \
+ SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
+ } while (0)
+
+ case offsetof(struct bpf_sock_ops, snd_cwnd):
+ SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, srtt_us):
+ SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+ SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_ssthresh):
+ SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rcv_nxt):
+ SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_nxt):
+ SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_una):
+ SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, mss_cache):
+ SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, ecn_flags):
+ SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_delivered):
+ SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_interval_us):
+ SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, packets_out):
+ SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, retrans_out):
+ SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, total_retrans):
+ SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_in):
+ SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_in):
+ SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_out):
+ SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_out):
+ SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, lost_out):
+ SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sacked_out):
+ SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
+ struct sock, type);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_received):
+ SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_acked):
+ SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
+ break;
+
}
return insn - insn_buf;
}
@@ -4473,6 +4787,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = {
};
const struct bpf_prog_ops sk_filter_prog_ops = {
+ .test_run = bpf_prog_test_run_skb,
};
const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15ce30063765..559db9ea8d86 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -24,6 +24,7 @@
#include <linux/tcp.h>
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
+#include <uapi/linux/batadv_packet.h>
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -133,10 +134,10 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
ctrl->addr_type = type;
}
-static void
-__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
- struct flow_dissector *flow_dissector,
- void *target_container)
+void
+skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
{
struct ip_tunnel_info *info;
struct ip_tunnel_key *key;
@@ -212,6 +213,7 @@ __skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
tp->dst = key->tp_dst;
}
}
+EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
@@ -436,6 +438,57 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
return FLOW_DISSECT_RET_PROTO_AGAIN;
}
+/**
+ * __skb_flow_dissect_batadv() - dissect batman-adv header
+ * @skb: sk_buff to with the batman-adv header
+ * @key_control: flow dissectors control key
+ * @data: raw buffer pointer to the packet, if NULL use skb->data
+ * @p_proto: pointer used to update the protocol to process next
+ * @p_nhoff: pointer used to update inner network header offset
+ * @hlen: packet header length
+ * @flags: any combination of FLOW_DISSECTOR_F_*
+ *
+ * ETH_P_BATMAN packets are tried to be dissected. Only
+ * &struct batadv_unicast packets are actually processed because they contain an
+ * inner ethernet header and are usually followed by actual network header. This
+ * allows the flow dissector to continue processing the packet.
+ *
+ * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found,
+ * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation,
+ * otherwise FLOW_DISSECT_RET_OUT_BAD
+ */
+static enum flow_dissect_ret
+__skb_flow_dissect_batadv(const struct sk_buff *skb,
+ struct flow_dissector_key_control *key_control,
+ void *data, __be16 *p_proto, int *p_nhoff, int hlen,
+ unsigned int flags)
+{
+ struct {
+ struct batadv_unicast_packet batadv_unicast;
+ struct ethhdr eth;
+ } *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen,
+ &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (hdr->batadv_unicast.packet_type != BATADV_UNICAST)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ *p_proto = hdr->eth.h_proto;
+ *p_nhoff += sizeof(*hdr);
+
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ return FLOW_DISSECT_RET_PROTO_AGAIN;
+}
+
static void
__skb_flow_dissect_tcp(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
@@ -576,9 +629,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
- __skb_flow_dissect_tunnel_info(skb, flow_dissector,
- target_container);
-
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
@@ -817,6 +867,11 @@ proto_again:
nhoff, hlen);
break;
+ case htons(ETH_P_BATMAN):
+ fdret = __skb_flow_dissect_batadv(skb, key_control, data,
+ &proto, &nhoff, hlen, flags);
+ break;
+
default:
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
@@ -976,8 +1031,8 @@ ip_proto_again:
out_good:
ret = true;
- key_control->thoff = (u16)nhoff;
out:
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
@@ -985,7 +1040,6 @@ out:
out_bad:
ret = false;
- key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9834cfa21b21..0a3f88f08727 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -159,7 +159,11 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
est->intvl_log = intvl_log;
est->cpu_bstats = cpu_bstats;
+ if (stats_lock)
+ local_bh_disable();
est_fetch_counters(est, &b);
+ if (stats_lock)
+ local_bh_enable();
est->last_bytes = b.bytes;
est->last_packets = b.packets;
old = rcu_dereference_protected(*rate_est, 1);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 87f28557b329..b2b2323bdc84 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -252,10 +252,10 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
}
}
-static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu,
- const struct gnet_stats_queue *q,
- __u32 qlen)
+void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu,
+ const struct gnet_stats_queue *q,
+ __u32 qlen)
{
if (cpu) {
__gnet_stats_copy_queue_cpu(qstats, cpu);
@@ -269,6 +269,7 @@ static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
qstats->qlen = qlen;
}
+EXPORT_SYMBOL(__gnet_stats_copy_queue);
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 982861607f88..e38e641e98d5 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
if (dev->ifindex != dev_get_iflink(dev))
return true;
- if (dev->priv_flags & IFF_TEAM_PORT)
+ if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
return true;
return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d1f5fe986edd..7b7a14abba28 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
@@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
n1 != NULL;
n1 = rcu_dereference_protected(n1->next,
lockdep_is_held(&tbl->lock))) {
- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
rc = n1;
@@ -2862,7 +2862,6 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
};
static const struct file_operations neigh_stat_seq_fops = {
- .owner = THIS_MODULE,
.open = neigh_stat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 615ccab55f38..e010bb800d7b 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -182,7 +182,6 @@ static int dev_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dev_seq_fops = {
- .owner = THIS_MODULE,
.open = dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -202,7 +201,6 @@ static int softnet_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations softnet_seq_fops = {
- .owner = THIS_MODULE,
.open = softnet_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -306,7 +304,6 @@ static int ptype_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ptype_seq_fops = {
- .owner = THIS_MODULE,
.open = ptype_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -387,7 +384,6 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dev_mc_seq_fops = {
- .owner = THIS_MODULE,
.open = dev_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 799b75268291..60a5ad2c33ee 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -295,10 +295,31 @@ static ssize_t carrier_changes_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
return sprintf(buf, fmt_dec,
- atomic_read(&netdev->carrier_changes));
+ atomic_read(&netdev->carrier_up_count) +
+ atomic_read(&netdev->carrier_down_count));
}
static DEVICE_ATTR_RO(carrier_changes);
+static ssize_t carrier_up_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
+}
+static DEVICE_ATTR_RO(carrier_up_count);
+
+static ssize_t carrier_down_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
+}
+static DEVICE_ATTR_RO(carrier_down_count);
+
/* read-write attributes */
static int change_mtu(struct net_device *dev, unsigned long new_mtu)
@@ -325,29 +346,6 @@ static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
}
NETDEVICE_SHOW_RW(flags, fmt_hex);
-static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
-{
- unsigned int orig_len = dev->tx_queue_len;
- int res;
-
- if (new_len != (unsigned int)new_len)
- return -ERANGE;
-
- if (new_len != orig_len) {
- dev->tx_queue_len = new_len;
- res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
- res = notifier_to_errno(res);
- if (res) {
- netdev_err(dev,
- "refused to change device tx_queue_len\n");
- dev->tx_queue_len = orig_len;
- return -EFAULT;
- }
- }
-
- return 0;
-}
-
static ssize_t tx_queue_len_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
@@ -355,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return netdev_store(dev, attr, buf, len, change_tx_queue_len);
+ return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len);
}
NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
@@ -547,6 +545,8 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,
&dev_attr_proto_down.attr,
+ &dev_attr_carrier_up_count.attr,
+ &dev_attr_carrier_down_count.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
@@ -961,7 +961,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct kobject *kobj = &dev->_rx[i].kobj;
- if (!atomic_read(&dev_net(dev)->count))
+ if (!refcount_read(&dev_net(dev)->count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1367,7 +1367,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
- if (!atomic_read(&dev_net(dev)->count))
+ if (!refcount_read(&dev_net(dev)->count))
queue->kobj.uevent_suppress = 1;
#ifdef CONFIG_BQL
sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
{
struct device *dev = &ndev->dev;
- if (!atomic_read(&dev_net(ndev)->count))
+ if (!refcount_read(&dev_net(ndev)->count))
dev_set_uevent_suppress(dev, 1);
kobject_get(&dev->kobj);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..3cad5f51afd3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -35,7 +35,7 @@ LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
struct net init_net = {
- .count = ATOMIC_INIT(1),
+ .count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
EXPORT_SYMBOL(init_net);
@@ -221,17 +221,26 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
*/
int peernet2id_alloc(struct net *net, struct net *peer)
{
- bool alloc;
+ bool alloc = false, alive = false;
int id;
- if (atomic_read(&net->count) == 0)
+ if (refcount_read(&net->count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
spin_lock_bh(&net->nsid_lock);
- alloc = atomic_read(&peer->count) == 0 ? false : true;
+ /*
+ * When peer is obtained from RCU lists, we may race with
+ * its cleanup. Check whether it's alive, and this guarantees
+ * we never hash a peer back to net->netns_ids, after it has
+ * just been idr_remove()'d from there in cleanup_net().
+ */
+ if (maybe_get_net(peer))
+ alive = alloc = true;
id = __peernet2id_alloc(net, peer, &alloc);
spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
+ if (alive)
+ put_net(peer);
return id;
}
EXPORT_SYMBOL_GPL(peernet2id_alloc);
@@ -264,11 +273,9 @@ struct net *get_net_ns_by_id(struct net *net, int id)
return NULL;
rcu_read_lock();
- spin_lock_bh(&net->nsid_lock);
peer = idr_find(&net->netns_ids, id);
if (peer)
- get_net(peer);
- spin_unlock_bh(&net->nsid_lock);
+ peer = maybe_get_net(peer);
rcu_read_unlock();
return peer;
@@ -284,7 +291,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
int error = 0;
LIST_HEAD(net_exit_list);
- atomic_set(&net->count, 1);
+ refcount_set(&net->count, 1);
refcount_set(&net->passive, 1);
net->dev_base_seq = 1;
net->user_ns = user_ns;
@@ -432,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags,
return net;
}
+static void unhash_nsid(struct net *net, struct net *last)
+{
+ struct net *tmp;
+ /* This function is only called from cleanup_net() work,
+ * and this work is the only process, that may delete
+ * a net from net_namespace_list. So, when the below
+ * is executing, the list may only grow. Thus, we do not
+ * use for_each_net_rcu() or rtnl_lock().
+ */
+ for_each_net(tmp) {
+ int id;
+
+ spin_lock_bh(&tmp->nsid_lock);
+ id = __peernet2id(tmp, net);
+ if (id >= 0)
+ idr_remove(&tmp->netns_ids, id);
+ spin_unlock_bh(&tmp->nsid_lock);
+ if (id >= 0)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+ if (tmp == last)
+ break;
+ }
+ spin_lock_bh(&net->nsid_lock);
+ idr_destroy(&net->netns_ids);
+ spin_unlock_bh(&net->nsid_lock);
+}
+
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
- struct net *net, *tmp;
+ struct net *net, *tmp, *last;
struct list_head net_kill_list;
LIST_HEAD(net_exit_list);
@@ -451,26 +485,25 @@ static void cleanup_net(struct work_struct *work)
/* Don't let anyone else find us. */
rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ list_for_each_entry(net, &net_kill_list, cleanup_list)
list_del_rcu(&net->list);
- list_add_tail(&net->exit_list, &net_exit_list);
- for_each_net(tmp) {
- int id;
-
- spin_lock_bh(&tmp->nsid_lock);
- id = __peernet2id(tmp, net);
- if (id >= 0)
- idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
- if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id);
- }
- spin_lock_bh(&net->nsid_lock);
- idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ /* Cache last net. After we unlock rtnl, no one new net
+ * added to net_namespace_list can assign nsid pointer
+ * to a net from net_kill_list (see peernet2id_alloc()).
+ * So, we skip them in unhash_nsid().
+ *
+ * Note, that unhash_nsid() does not delete nsid links
+ * between net_kill_list's nets, as they've already
+ * deleted from net_namespace_list. But, this would be
+ * useless anyway, as netns_ids are destroyed there.
+ */
+ last = list_last_entry(&net_namespace_list, struct net, list);
+ rtnl_unlock();
+ list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ unhash_nsid(net, last);
+ list_add_tail(&net->exit_list, &net_exit_list);
}
- rtnl_unlock();
/*
* Another CPU might be rcu-iterating the list, wait for it.
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 1c4810919a0a..b9057478d69c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
-#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f95a15086225..b8ab5c829511 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -184,25 +184,44 @@
#define func_enter() pr_debug("entering %s\n", __func__);
+#define PKT_FLAGS \
+ pf(IPV6) /* Interface in IPV6 Mode */ \
+ pf(IPSRC_RND) /* IP-Src Random */ \
+ pf(IPDST_RND) /* IP-Dst Random */ \
+ pf(TXSIZE_RND) /* Transmit size is random */ \
+ pf(UDPSRC_RND) /* UDP-Src Random */ \
+ pf(UDPDST_RND) /* UDP-Dst Random */ \
+ pf(UDPCSUM) /* Include UDP checksum */ \
+ pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \
+ pf(MPLS_RND) /* Random MPLS labels */ \
+ pf(QUEUE_MAP_RND) /* queue map Random */ \
+ pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \
+ pf(FLOW_SEQ) /* Sequential flows */ \
+ pf(IPSEC) /* ipsec on for flows */ \
+ pf(MACSRC_RND) /* MAC-Src Random */ \
+ pf(MACDST_RND) /* MAC-Dst Random */ \
+ pf(VID_RND) /* Random VLAN ID */ \
+ pf(SVID_RND) /* Random SVLAN ID */ \
+ pf(NODE) /* Node memory alloc*/ \
+
+#define pf(flag) flag##_SHIFT,
+enum pkt_flags {
+ PKT_FLAGS
+};
+#undef pf
+
/* Device flag bits */
-#define F_IPSRC_RND (1<<0) /* IP-Src Random */
-#define F_IPDST_RND (1<<1) /* IP-Dst Random */
-#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */
-#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */
-#define F_MACSRC_RND (1<<4) /* MAC-Src Random */
-#define F_MACDST_RND (1<<5) /* MAC-Dst Random */
-#define F_TXSIZE_RND (1<<6) /* Transmit size is random */
-#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */
-#define F_MPLS_RND (1<<8) /* Random MPLS labels */
-#define F_VID_RND (1<<9) /* Random VLAN ID */
-#define F_SVID_RND (1<<10) /* Random SVLAN ID */
-#define F_FLOW_SEQ (1<<11) /* Sequential flows */
-#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
-#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
-#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
-#define F_NODE (1<<15) /* Node memory alloc*/
-#define F_UDPCSUM (1<<16) /* Include UDP checksum */
-#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */
+#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT);
+PKT_FLAGS
+#undef pf
+
+#define pf(flag) __stringify(flag),
+static char *pkt_flag_names[] = {
+ PKT_FLAGS
+};
+#undef pf
+
+#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names)
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -399,7 +418,7 @@ struct pktgen_dev {
__u8 ipsmode; /* IPSEC mode (config) */
__u8 ipsproto; /* IPSEC type (config) */
__u32 spi;
- struct dst_entry dst;
+ struct xfrm_dst xdst;
struct dst_ops dstops;
#endif
char result[512];
@@ -523,7 +542,6 @@ static int pgctrl_open(struct inode *inode, struct file *file)
}
static const struct file_operations pktgen_fops = {
- .owner = THIS_MODULE,
.open = pgctrl_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -535,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
{
const struct pktgen_dev *pkt_dev = seq->private;
ktime_t stopped;
+ unsigned int i;
u64 idle;
seq_printf(seq,
@@ -596,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
if (pkt_dev->nr_labels) {
- unsigned int i;
seq_puts(seq, " mpls: ");
for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
@@ -632,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " Flags: ");
- if (pkt_dev->flags & F_IPV6)
- seq_puts(seq, "IPV6 ");
-
- if (pkt_dev->flags & F_IPSRC_RND)
- seq_puts(seq, "IPSRC_RND ");
-
- if (pkt_dev->flags & F_IPDST_RND)
- seq_puts(seq, "IPDST_RND ");
-
- if (pkt_dev->flags & F_TXSIZE_RND)
- seq_puts(seq, "TXSIZE_RND ");
-
- if (pkt_dev->flags & F_UDPSRC_RND)
- seq_puts(seq, "UDPSRC_RND ");
-
- if (pkt_dev->flags & F_UDPDST_RND)
- seq_puts(seq, "UDPDST_RND ");
-
- if (pkt_dev->flags & F_UDPCSUM)
- seq_puts(seq, "UDPCSUM ");
-
- if (pkt_dev->flags & F_NO_TIMESTAMP)
- seq_puts(seq, "NO_TIMESTAMP ");
-
- if (pkt_dev->flags & F_MPLS_RND)
- seq_puts(seq, "MPLS_RND ");
-
- if (pkt_dev->flags & F_QUEUE_MAP_RND)
- seq_puts(seq, "QUEUE_MAP_RND ");
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (i == F_FLOW_SEQ)
+ if (!pkt_dev->cflows)
+ continue;
- if (pkt_dev->flags & F_QUEUE_MAP_CPU)
- seq_puts(seq, "QUEUE_MAP_CPU ");
-
- if (pkt_dev->cflows) {
- if (pkt_dev->flags & F_FLOW_SEQ)
- seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
- else
- seq_puts(seq, "FLOW_RND ");
- }
+ if (pkt_dev->flags & (1 << i))
+ seq_printf(seq, "%s ", pkt_flag_names[i]);
+ else if (i == F_FLOW_SEQ)
+ seq_puts(seq, "FLOW_RND ");
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON) {
- seq_puts(seq, "IPSEC ");
- if (pkt_dev->spi)
+ if (i == F_IPSEC && pkt_dev->spi)
seq_printf(seq, "spi:%u", pkt_dev->spi);
- }
#endif
-
- if (pkt_dev->flags & F_MACSRC_RND)
- seq_puts(seq, "MACSRC_RND ");
-
- if (pkt_dev->flags & F_MACDST_RND)
- seq_puts(seq, "MACDST_RND ");
-
- if (pkt_dev->flags & F_VID_RND)
- seq_puts(seq, "VID_RND ");
-
- if (pkt_dev->flags & F_SVID_RND)
- seq_puts(seq, "SVID_RND ");
-
- if (pkt_dev->flags & F_NODE)
- seq_puts(seq, "NODE_ALLOC ");
+ }
seq_puts(seq, "\n");
@@ -859,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
return i;
}
+static __u32 pktgen_read_flag(const char *f, bool *disable)
+{
+ __u32 i;
+
+ if (f[0] == '!') {
+ *disable = true;
+ f++;
+ }
+
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT)
+ continue;
+
+ /* allow only disabling ipv6 flag */
+ if (!*disable && i == IPV6_SHIFT)
+ continue;
+
+ if (strcmp(f, pkt_flag_names[i]) == 0)
+ return 1 << i;
+ }
+
+ if (strcmp(f, "FLOW_RND") == 0) {
+ *disable = !*disable;
+ return F_FLOW_SEQ;
+ }
+
+ return 0;
+}
+
static ssize_t pktgen_if_write(struct file *file,
const char __user * user_buffer, size_t count,
loff_t * offset)
@@ -1216,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
if (!strcmp(name, "flag")) {
+ __u32 flag;
char f[32];
+ bool disable = false;
+
memset(f, 0, 32);
len = strn_len(&user_buffer[i], sizeof(f) - 1);
if (len < 0)
@@ -1225,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file,
if (copy_from_user(f, &user_buffer[i], len))
return -EFAULT;
i += len;
- if (strcmp(f, "IPSRC_RND") == 0)
- pkt_dev->flags |= F_IPSRC_RND;
-
- else if (strcmp(f, "!IPSRC_RND") == 0)
- pkt_dev->flags &= ~F_IPSRC_RND;
-
- else if (strcmp(f, "TXSIZE_RND") == 0)
- pkt_dev->flags |= F_TXSIZE_RND;
-
- else if (strcmp(f, "!TXSIZE_RND") == 0)
- pkt_dev->flags &= ~F_TXSIZE_RND;
-
- else if (strcmp(f, "IPDST_RND") == 0)
- pkt_dev->flags |= F_IPDST_RND;
-
- else if (strcmp(f, "!IPDST_RND") == 0)
- pkt_dev->flags &= ~F_IPDST_RND;
-
- else if (strcmp(f, "UDPSRC_RND") == 0)
- pkt_dev->flags |= F_UDPSRC_RND;
-
- else if (strcmp(f, "!UDPSRC_RND") == 0)
- pkt_dev->flags &= ~F_UDPSRC_RND;
-
- else if (strcmp(f, "UDPDST_RND") == 0)
- pkt_dev->flags |= F_UDPDST_RND;
-
- else if (strcmp(f, "!UDPDST_RND") == 0)
- pkt_dev->flags &= ~F_UDPDST_RND;
-
- else if (strcmp(f, "MACSRC_RND") == 0)
- pkt_dev->flags |= F_MACSRC_RND;
-
- else if (strcmp(f, "!MACSRC_RND") == 0)
- pkt_dev->flags &= ~F_MACSRC_RND;
-
- else if (strcmp(f, "MACDST_RND") == 0)
- pkt_dev->flags |= F_MACDST_RND;
-
- else if (strcmp(f, "!MACDST_RND") == 0)
- pkt_dev->flags &= ~F_MACDST_RND;
-
- else if (strcmp(f, "MPLS_RND") == 0)
- pkt_dev->flags |= F_MPLS_RND;
-
- else if (strcmp(f, "!MPLS_RND") == 0)
- pkt_dev->flags &= ~F_MPLS_RND;
- else if (strcmp(f, "VID_RND") == 0)
- pkt_dev->flags |= F_VID_RND;
+ flag = pktgen_read_flag(f, &disable);
- else if (strcmp(f, "!VID_RND") == 0)
- pkt_dev->flags &= ~F_VID_RND;
-
- else if (strcmp(f, "SVID_RND") == 0)
- pkt_dev->flags |= F_SVID_RND;
-
- else if (strcmp(f, "!SVID_RND") == 0)
- pkt_dev->flags &= ~F_SVID_RND;
-
- else if (strcmp(f, "FLOW_SEQ") == 0)
- pkt_dev->flags |= F_FLOW_SEQ;
-
- else if (strcmp(f, "QUEUE_MAP_RND") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "QUEUE_MAP_CPU") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_CPU;
-
- else if (strcmp(f, "!QUEUE_MAP_CPU") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
-#ifdef CONFIG_XFRM
- else if (strcmp(f, "IPSEC") == 0)
- pkt_dev->flags |= F_IPSEC_ON;
-#endif
-
- else if (strcmp(f, "!IPV6") == 0)
- pkt_dev->flags &= ~F_IPV6;
-
- else if (strcmp(f, "NODE_ALLOC") == 0)
- pkt_dev->flags |= F_NODE;
-
- else if (strcmp(f, "!NODE_ALLOC") == 0)
- pkt_dev->flags &= ~F_NODE;
-
- else if (strcmp(f, "UDPCSUM") == 0)
- pkt_dev->flags |= F_UDPCSUM;
-
- else if (strcmp(f, "!UDPCSUM") == 0)
- pkt_dev->flags &= ~F_UDPCSUM;
-
- else if (strcmp(f, "NO_TIMESTAMP") == 0)
- pkt_dev->flags |= F_NO_TIMESTAMP;
-
- else if (strcmp(f, "!NO_TIMESTAMP") == 0)
- pkt_dev->flags &= ~F_NO_TIMESTAMP;
-
- else {
+ if (flag) {
+ if (disable)
+ pkt_dev->flags &= ~flag;
+ else
+ pkt_dev->flags |= flag;
+ } else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
@@ -1804,7 +1715,6 @@ static int pktgen_if_open(struct inode *inode, struct file *file)
}
static const struct file_operations pktgen_if_fops = {
- .owner = THIS_MODULE,
.open = pktgen_if_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1942,7 +1852,6 @@ static int pktgen_thread_open(struct inode *inode, struct file *file)
}
static const struct file_operations pktgen_thread_fops = {
- .owner = THIS_MODULE,
.open = pktgen_thread_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2544,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->flows[flow].cur_daddr =
pkt_dev->cur_daddr;
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON)
+ if (pkt_dev->flags & F_IPSEC)
get_ipsec_sa(pkt_dev, flow);
#endif
pkt_dev->nflows++;
@@ -2609,7 +2518,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
* supports both transport/tunnel mode + ESP/AH type.
*/
if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0))
- skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF;
+ skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF;
rcu_read_lock_bh();
err = x->outer_mode->output(x, skb);
@@ -2649,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
static int process_ipsec(struct pktgen_dev *pkt_dev,
struct sk_buff *skb, __be16 protocol)
{
- if (pkt_dev->flags & F_IPSEC_ON) {
+ if (pkt_dev->flags & F_IPSEC) {
struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
int nhead = 0;
if (x) {
@@ -3742,10 +3651,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
* performance under such circumstance.
*/
pkt_dev->dstops.family = AF_INET;
- pkt_dev->dst.dev = pkt_dev->odev;
- dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false);
- pkt_dev->dst.child = &pkt_dev->dst;
- pkt_dev->dst.ops = &pkt_dev->dstops;
+ pkt_dev->xdst.u.dst.dev = pkt_dev->odev;
+ dst_init_metrics(&pkt_dev->xdst.u.dst, pktgen_dst_metrics, false);
+ pkt_dev->xdst.child = &pkt_dev->xdst.u.dst;
+ pkt_dev->xdst.u.dst.ops = &pkt_dev->dstops;
#endif
return add_dev_to_thread(t, pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8..56af8e41abfc 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,9 @@
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
+ struct module *owner;
unsigned int flags;
+ struct rcu_head rcu;
};
static DEFINE_MUTEX(rtnl_mutex);
@@ -127,8 +129,7 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
-static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
-static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
+static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
{
@@ -144,72 +145,127 @@ static inline int rtm_msgindex(int msgtype)
return msgindex;
}
-/**
- * __rtnl_register - Register a rtnetlink message type
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
- *
- * Registers the specified function pointers (at least one of them has
- * to be non-NULL) to be called whenever a request message for the
- * specified protocol family and message type is received.
- *
- * The special protocol family PF_UNSPEC may be used to define fallback
- * function pointers for the case when no entry for the specific protocol
- * family exists.
- *
- * Returns 0 on success or a negative error code.
- */
-int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
+static struct rtnl_link *rtnl_get_link(int protocol, int msgtype)
+{
+ struct rtnl_link **tab;
+
+ if (protocol >= ARRAY_SIZE(rtnl_msg_handlers))
+ protocol = PF_UNSPEC;
+
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]);
+ if (!tab)
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]);
+
+ return tab[msgtype];
+}
+
+static int rtnl_register_internal(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
{
- struct rtnl_link *tab;
+ struct rtnl_link *link, *old;
+ struct rtnl_link __rcu **tab;
int msgindex;
+ int ret = -ENOBUFS;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
- tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
+ rtnl_lock();
+ tab = rtnl_msg_handlers[protocol];
if (tab == NULL) {
- tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
- if (tab == NULL)
- return -ENOBUFS;
+ tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL);
+ if (!tab)
+ goto unlock;
+ /* ensures we see the 0 stores */
rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
}
+ old = rtnl_dereference(tab[msgindex]);
+ if (old) {
+ link = kmemdup(old, sizeof(*old), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ } else {
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ }
+
+ WARN_ON(link->owner && link->owner != owner);
+ link->owner = owner;
+
+ WARN_ON(doit && link->doit && link->doit != doit);
if (doit)
- tab[msgindex].doit = doit;
+ link->doit = doit;
+ WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit);
if (dumpit)
- tab[msgindex].dumpit = dumpit;
- tab[msgindex].flags |= flags;
+ link->dumpit = dumpit;
- return 0;
+ link->flags |= flags;
+
+ /* publish protocol:msgtype */
+ rcu_assign_pointer(tab[msgindex], link);
+ ret = 0;
+ if (old)
+ kfree_rcu(old, rcu);
+unlock:
+ rtnl_unlock();
+ return ret;
}
-EXPORT_SYMBOL_GPL(__rtnl_register);
+
+/**
+ * rtnl_register_module - Register a rtnetlink message type
+ *
+ * @owner: module registering the hook (THIS_MODULE)
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ *
+ * Like rtnl_register, but for use by removable modules.
+ */
+int rtnl_register_module(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
+{
+ return rtnl_register_internal(owner, protocol, msgtype,
+ doit, dumpit, flags);
+}
+EXPORT_SYMBOL_GPL(rtnl_register_module);
/**
* rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
*
- * Identical to __rtnl_register() but panics on failure. This is useful
- * as failure of this function is very unlikely, it can only happen due
- * to lack of memory when allocating the chain to store all message
- * handlers for a protocol. Meant for use in init functions where lack
- * of memory implies no sense in continuing.
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
*/
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags)
{
- if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
- panic("Unable to register rtnetlink message handler, "
- "protocol = %d, message type = %d\n",
- protocol, msgtype);
+ int err;
+
+ err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
+ flags);
+ if (err)
+ pr_err("Unable to register rtnetlink message handler, "
+ "protocol = %d, message type = %d\n", protocol, msgtype);
}
-EXPORT_SYMBOL_GPL(rtnl_register);
/**
* rtnl_unregister - Unregister a rtnetlink message type
@@ -220,24 +276,25 @@ EXPORT_SYMBOL_GPL(rtnl_register);
*/
int rtnl_unregister(int protocol, int msgtype)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
- if (!handlers) {
+ tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ if (!tab) {
rtnl_unlock();
return -ENOENT;
}
- handlers[msgindex].doit = NULL;
- handlers[msgindex].dumpit = NULL;
- handlers[msgindex].flags = 0;
+ link = tab[msgindex];
+ rcu_assign_pointer(tab[msgindex], NULL);
rtnl_unlock();
+ kfree_rcu(link, rcu);
+
return 0;
}
EXPORT_SYMBOL_GPL(rtnl_unregister);
@@ -251,20 +308,27 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
*/
void rtnl_unregister_all(int protocol)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
+ int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ tab = rtnl_msg_handlers[protocol];
RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
+ for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
+ link = tab[msgindex];
+ if (!link)
+ continue;
+
+ rcu_assign_pointer(tab[msgindex], NULL);
+ kfree_rcu(link, rcu);
+ }
rtnl_unlock();
synchronize_net();
- while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
- schedule();
- kfree(handlers);
+ kfree(tab);
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
@@ -840,6 +904,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_RX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
@@ -920,8 +988,11 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
+ + nla_total_size(4) /* IFLA_NEW_IFINDEX */
+ nla_total_size(1) /* IFLA_PROTO_DOWN */
+ nla_total_size(4) /* IFLA_IF_NETNSID */
+ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ 0;
}
@@ -1194,7 +1265,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+ vf_stats.multicast, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
+ vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
+ vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
nla_nest_cancel(skb, vfstats);
goto nla_put_vf_failure;
}
@@ -1261,6 +1336,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
{
const struct net_device_ops *ops = dev->netdev_ops;
const struct bpf_prog *generic_xdp_prog;
+ struct netdev_bpf xdp;
ASSERT_RTNL();
@@ -1273,7 +1349,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
- return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
+ __dev_xdp_query(dev, ops->ndo_bpf, &xdp);
+ *prog_id = xdp.prog_id;
+
+ return xdp.prog_attached;
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1433,7 +1512,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
- u32 event, int *new_nsid, int tgt_netnsid)
+ u32 event, int *new_nsid, int new_ifindex,
+ int tgt_netnsid)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
@@ -1475,8 +1555,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
- atomic_read(&dev->carrier_changes)) ||
- nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ atomic_read(&dev->carrier_up_count) +
+ atomic_read(&dev->carrier_down_count)) ||
+ nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
+ nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
+ atomic_read(&dev->carrier_up_count)) ||
+ nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
+ atomic_read(&dev->carrier_down_count)))
goto nla_put_failure;
if (event != IFLA_EVENT_NONE) {
@@ -1525,6 +1610,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (new_nsid &&
nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
goto nla_put_failure;
+ if (new_ifindex &&
+ nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
+ goto nla_put_failure;
+
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
@@ -1569,6 +1658,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
@@ -1578,6 +1669,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
[IFLA_IF_NETNSID] = { .type = NLA_S32 },
+ [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
+ [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1681,18 +1774,18 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
-static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+static struct net *get_target_net(struct sock *sk, int netnsid)
{
struct net *net;
- net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ net = get_net_ns_by_id(sock_net(sk), netnsid);
if (!net)
return ERR_PTR(-EINVAL);
/* For now, the caller is required to have CAP_NET_ADMIN in
* the user namespace owning the target net ns.
*/
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
put_net(net);
return ERR_PTR(-EACCES);
}
@@ -1733,7 +1826,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
ifla_policy, NULL) >= 0) {
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(skb->sk, netnsid);
if (IS_ERR(tgt_net)) {
tgt_net = net;
netnsid = -1;
@@ -1766,7 +1859,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask, 0, NULL,
+ ext_filter_mask, 0, NULL, 0,
netnsid);
if (err < 0) {
@@ -1815,6 +1908,49 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
}
EXPORT_SYMBOL(rtnl_link_get_net);
+/* Figure out which network namespace we are talking about by
+ * examining the link attributes in the following order:
+ *
+ * 1. IFLA_NET_NS_PID
+ * 2. IFLA_NET_NS_FD
+ * 3. IFLA_IF_NETNSID
+ */
+static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net,
+ struct nlattr *tb[])
+{
+ struct net *net;
+
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])
+ return rtnl_link_get_net(src_net, tb);
+
+ if (!tb[IFLA_IF_NETNSID])
+ return get_net(src_net);
+
+ net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_IF_NETNSID]));
+ if (!net)
+ return ERR_PTR(-EINVAL);
+
+ return net;
+}
+
+static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb,
+ struct net *src_net,
+ struct nlattr *tb[], int cap)
+{
+ struct net *net;
+
+ net = rtnl_link_get_net_by_nlattr(src_net, tb);
+ if (IS_ERR(net))
+ return net;
+
+ if (!netlink_ns_capable(skb, net->user_ns, cap)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+
+ return net;
+}
+
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
{
if (dev) {
@@ -2077,17 +2213,14 @@ static int do_setlink(const struct sk_buff *skb,
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
- struct net *net = rtnl_link_get_net(dev_net(dev), tb);
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_IF_NETNSID]) {
+ struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
+ tb, CAP_NET_ADMIN);
if (IS_ERR(net)) {
err = PTR_ERR(net);
goto errout;
}
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
- put_net(net);
- err = -EPERM;
- goto errout;
- }
+
err = dev_change_net_namespace(dev, net, ifname);
put_net(net);
if (err)
@@ -2204,17 +2337,37 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_TXQLEN]) {
unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
- unsigned int orig_len = dev->tx_queue_len;
-
- if (dev->tx_queue_len ^ value) {
- dev->tx_queue_len = value;
- err = call_netdevice_notifiers(
- NETDEV_CHANGE_TX_QUEUE_LEN, dev);
- err = notifier_to_errno(err);
- if (err) {
- dev->tx_queue_len = orig_len;
- goto errout;
- }
+
+ err = dev_change_tx_queue_len(dev, value);
+ if (err)
+ goto errout;
+ status |= DO_SETLINK_MODIFIED;
+ }
+
+ if (tb[IFLA_GSO_MAX_SIZE]) {
+ u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
+
+ if (max_size > GSO_MAX_SIZE) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_size ^ max_size) {
+ netif_set_gso_max_size(dev, max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
+ if (tb[IFLA_GSO_MAX_SEGS]) {
+ u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
+
+ if (max_segs > GSO_MAX_SEGS) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_segs ^ max_segs) {
+ dev->gso_max_segs = max_segs;
status |= DO_SETLINK_MODIFIED;
}
}
@@ -2400,9 +2553,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
- if (tb[IFLA_IF_NETNSID])
- return -EOPNOTSUPP;
-
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2487,36 +2637,53 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct net_device *dev;
+ struct net *tgt_net = net;
+ struct net_device *dev = NULL;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
+ int netnsid = -1;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
if (err < 0)
return err;
- if (tb[IFLA_IF_NETNSID])
- return -EOPNOTSUPP;
-
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
+ err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(net, ifm->ifi_index);
+ dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ dev = __dev_get_by_name(tgt_net, ifname);
else if (tb[IFLA_GROUP])
- return rtnl_group_dellink(net, nla_get_u32(tb[IFLA_GROUP]));
+ err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
- return -EINVAL;
+ goto out;
- if (!dev)
- return -ENODEV;
+ if (!dev) {
+ if (tb[IFLA_IFNAME] || ifm->ifi_index > 0)
+ err = -ENODEV;
+
+ goto out;
+ }
+
+ err = rtnl_delete_link(dev);
+
+out:
+ if (netnsid >= 0)
+ put_net(tgt_net);
- return rtnl_delete_link(dev);
+ return err;
}
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
@@ -2583,6 +2750,10 @@ struct net_device *rtnl_create_link(struct net *net,
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ if (tb[IFLA_GSO_MAX_SIZE])
+ netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
+ if (tb[IFLA_GSO_MAX_SEGS])
+ dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
return dev;
}
@@ -2631,9 +2802,6 @@ replay:
if (err < 0)
return err;
- if (tb[IFLA_IF_NETNSID])
- return -EOPNOTSUPP;
-
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2781,14 +2949,10 @@ replay:
name_assign_type = NET_NAME_ENUM;
}
- dest_net = rtnl_link_get_net(net, tb);
+ dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
- err = -EPERM;
- if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN))
- goto out;
-
if (tb[IFLA_LINK_NETNSID]) {
int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
@@ -2883,7 +3047,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
if (IS_ERR(tgt_net))
return PTR_ERR(tgt_net);
}
@@ -2915,7 +3079,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
err = rtnl_fill_ifinfo(nskb, dev, net,
RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask,
- 0, NULL, netnsid);
+ 0, NULL, 0, netnsid);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
@@ -2973,18 +3137,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = 1;
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
+ struct rtnl_link **tab;
int type = cb->nlh->nlmsg_type-RTM_BASE;
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
rtnl_dumpit_func dumpit;
if (idx < s_idx || idx == PF_PACKET)
continue;
- handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
- if (!handlers)
+ if (type < 0 || type >= RTM_NR_MSGTYPES)
continue;
- dumpit = READ_ONCE(handlers[type].dumpit);
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]);
+ if (!tab)
+ continue;
+
+ link = tab[type];
+ if (!link)
+ continue;
+
+ dumpit = link->dumpit;
if (!dumpit)
continue;
@@ -3003,7 +3175,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
- u32 event, gfp_t flags, int *new_nsid)
+ u32 event, gfp_t flags, int *new_nsid,
+ int new_ifindex)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -3016,7 +3189,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
type, 0, 0, change, 0, 0, event,
- new_nsid, -1);
+ new_nsid, new_ifindex, -1);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -3039,14 +3212,15 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
- gfp_t flags, int *new_nsid)
+ gfp_t flags, int *new_nsid, int new_ifindex)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid,
+ new_ifindex);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
@@ -3054,14 +3228,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
- rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ NULL, 0);
}
void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
- gfp_t flags, int *new_nsid)
+ gfp_t flags, int *new_nsid, int new_ifindex)
{
rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
- new_nsid);
+ new_nsid, new_ifindex);
}
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
@@ -4314,7 +4489,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
+ struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
@@ -4338,79 +4514,85 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (family >= ARRAY_SIZE(rtnl_msg_handlers))
- family = PF_UNSPEC;
-
rcu_read_lock();
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- if (!handlers) {
- family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- }
-
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u16 min_dump_alloc = 0;
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
- if (!handlers)
- goto err_unlock;
-
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit)
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit)
goto err_unlock;
}
-
- refcount_inc(&rtnl_msg_handlers_ref[family]);
+ owner = link->owner;
+ dumpit = link->dumpit;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
+ err = 0;
+ /* need to do this before rcu_read_unlock() */
+ if (!try_module_get(owner))
+ err = -EPROTONOSUPPORT;
+
rcu_read_unlock();
rtnl = net->rtnl;
- {
+ if (err == 0) {
struct netlink_dump_control c = {
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
+ .module = owner,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
+ /* netlink_dump_start() will keep a reference on
+ * module if dump is still in progress.
+ */
+ module_put(owner);
}
- refcount_dec(&rtnl_msg_handlers_ref[family]);
return err;
}
- doit = READ_ONCE(handlers[type].doit);
- if (!doit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->doit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
+ link = rtnl_get_link(PF_UNSPEC, type);
+ if (!link || !link->doit)
+ goto out_unlock;
+ }
+
+ owner = link->owner;
+ if (!try_module_get(owner)) {
+ err = -EPROTONOSUPPORT;
+ goto out_unlock;
}
- flags = READ_ONCE(handlers[type].flags);
+ flags = link->flags;
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
- refcount_inc(&rtnl_msg_handlers_ref[family]);
- doit = READ_ONCE(handlers[type].doit);
+ doit = link->doit;
rcu_read_unlock();
if (doit)
err = doit(skb, nlh, extack);
- refcount_dec(&rtnl_msg_handlers_ref[family]);
+ module_put(owner);
return err;
}
-
rcu_read_unlock();
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[family]);
- if (handlers) {
- doit = READ_ONCE(handlers[type].doit);
- if (doit)
- err = doit(skb, nlh, extack);
- }
+ link = rtnl_get_link(family, type);
+ if (link && link->doit)
+ err = link->doit(skb, nlh, extack);
rtnl_unlock();
+
+ module_put(owner);
+
+ return err;
+
+out_unlock:
+ rcu_read_unlock();
return err;
err_unlock:
@@ -4454,7 +4636,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_CHANGELOWERSTATE:
case NETDEV_CHANGE_TX_QUEUE_LEN:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
- GFP_KERNEL, NULL);
+ GFP_KERNEL, NULL, 0);
break;
default:
break;
@@ -4498,11 +4680,6 @@ static struct pernet_operations rtnetlink_net_ops = {
void __init rtnetlink_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
- refcount_set(&rtnl_msg_handlers_ref[i], 1);
-
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b0ff396fa9d..8c61c27c1b28 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
int i, new_frags;
u32 d_off;
- if (!num_frags)
- return 0;
-
if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
return -EINVAL;
+ if (!num_frags)
+ goto release;
+
new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < new_frags; i++) {
page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
skb_shinfo(skb)->nr_frags = new_frags;
+release:
skb_zcopy_clear(skb, false);
return 0;
}
@@ -3654,7 +3655,9 @@ normal:
skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
SKBTX_SHARED_FRAG;
- if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
+
+ if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
goto err;
while (pos < offset + len) {
@@ -3668,6 +3671,11 @@ normal:
BUG_ON(!nfrags);
+ if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, frag_skb,
+ GFP_ATOMIC))
+ goto err;
+
list_skb = list_skb->next;
}
@@ -3679,9 +3687,6 @@ normal:
goto err;
}
- if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
- goto err;
-
*nskb_frag = *frag;
__skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
@@ -4293,7 +4298,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock *sk = skb->sk;
if (!skb_may_tx_timestamp(sk, false))
- return;
+ goto err;
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
@@ -4302,7 +4307,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
sock_put(sk);
+ return;
}
+
+err:
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
@@ -4905,37 +4914,74 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
/**
- * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
*
- * @skb: GSO skb
- * @mtu: MTU to validate against
+ * There are a couple of instances where we have a GSO skb, and we
+ * want to determine what size it would be after it is segmented.
*
- * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
- * once split.
+ * We might want to check:
+ * - L3+L4+payload size (e.g. IP forwarding)
+ * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
+ *
+ * This is a helper to do that correctly considering GSO_BY_FRAGS.
+ *
+ * @seg_len: The segmented length (from skb_gso_*_seglen). In the
+ * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
+ *
+ * @max_len: The maximum permissible length.
+ *
+ * Returns true if the segmented length <= max length.
*/
-bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
+static inline bool skb_gso_size_check(const struct sk_buff *skb,
+ unsigned int seg_len,
+ unsigned int max_len) {
const struct skb_shared_info *shinfo = skb_shinfo(skb);
const struct sk_buff *iter;
- unsigned int hlen;
-
- hlen = skb_gso_network_seglen(skb);
if (shinfo->gso_size != GSO_BY_FRAGS)
- return hlen <= mtu;
+ return seg_len <= max_len;
/* Undo this so we can re-use header sizes */
- hlen -= GSO_BY_FRAGS;
+ seg_len -= GSO_BY_FRAGS;
skb_walk_frags(skb, iter) {
- if (hlen + skb_headlen(iter) > mtu)
+ if (seg_len + skb_headlen(iter) > max_len)
return false;
}
return true;
}
+
+/**
+ * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ *
+ * @skb: GSO skb
+ * @mtu: MTU to validate against
+ *
+ * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
+ * once split.
+ */
+bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
+}
EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+/**
+ * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
+ *
+ * @skb: GSO skb
+ * @len: length to validate against
+ *
+ * skb_gso_validate_mac_len validates if a given skb will fit a wanted
+ * length once split, including L2, L3 and L4 headers and the payload.
+ */
+bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
+{
+ return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
+
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
if (skb_cow(skb, skb_headroom(skb)) < 0) {
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f17412..e50e7b3f2223 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,8 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+static void sock_inuse_add(struct net *net, int val);
+
/**
* sk_ns_capable - General socket capability test
* @sk: Socket to use a capability on or through
@@ -1531,8 +1533,11 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sk->sk_kern_sock = kern;
sock_lock_init(sk);
sk->sk_net_refcnt = kern ? 0 : 1;
- if (likely(sk->sk_net_refcnt))
+ if (likely(sk->sk_net_refcnt)) {
get_net(net);
+ sock_inuse_add(net, 1);
+ }
+
sock_net_set(sk, net);
refcount_set(&sk->sk_wmem_alloc, 1);
@@ -1595,6 +1600,9 @@ void sk_destruct(struct sock *sk)
static void __sk_free(struct sock *sk)
{
+ if (likely(sk->sk_net_refcnt))
+ sock_inuse_add(sock_net(sk), -1);
+
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
sock_diag_broadcast_destroy(sk);
else
@@ -1675,16 +1683,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
-
- /* sk->sk_memcg will be populated at accept() time */
- newsk->sk_memcg = NULL;
-
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+ mem_cgroup_sk_alloc(newsk);
cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();
@@ -1716,6 +1721,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+ if (likely(newsk->sk_net_refcnt))
+ sock_inuse_add(sock_net(newsk), 1);
/*
* Before updating sk_refcnt, we must commit prior changes to memory
@@ -2496,7 +2503,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
}
EXPORT_SYMBOL(sock_no_getname);
-unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
+__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
{
return 0;
}
@@ -3045,7 +3052,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
- __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
+ __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
@@ -3055,21 +3062,50 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
int res = 0;
for_each_possible_cpu(cpu)
- res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+ res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
return res >= 0 ? res : 0;
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+static void sock_inuse_add(struct net *net, int val)
+{
+ this_cpu_add(*net->core.sock_inuse, val);
+}
+
+int sock_inuse_get(struct net *net)
+{
+ int cpu, res = 0;
+
+ for_each_possible_cpu(cpu)
+ res += *per_cpu_ptr(net->core.sock_inuse, cpu);
+
+ return res;
+}
+
+EXPORT_SYMBOL_GPL(sock_inuse_get);
+
static int __net_init sock_inuse_init_net(struct net *net)
{
- net->core.inuse = alloc_percpu(struct prot_inuse);
- return net->core.inuse ? 0 : -ENOMEM;
+ net->core.prot_inuse = alloc_percpu(struct prot_inuse);
+ if (net->core.prot_inuse == NULL)
+ return -ENOMEM;
+
+ net->core.sock_inuse = alloc_percpu(int);
+ if (net->core.sock_inuse == NULL)
+ goto out;
+
+ return 0;
+
+out:
+ free_percpu(net->core.prot_inuse);
+ return -ENOMEM;
}
static void __net_exit sock_inuse_exit_net(struct net *net)
{
- free_percpu(net->core.inuse);
+ free_percpu(net->core.prot_inuse);
+ free_percpu(net->core.sock_inuse);
}
static struct pernet_operations net_inuse_ops = {
@@ -3112,6 +3148,10 @@ static inline void assign_proto_idx(struct proto *prot)
static inline void release_proto_idx(struct proto *prot)
{
}
+
+static void sock_inuse_add(struct net *net, int val)
+{
+}
#endif
static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
@@ -3319,7 +3359,6 @@ static int proto_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations proto_seq_fops = {
- .owner = THIS_MODULE,
.open = proto_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 217f4e3b82f6..146b50e30659 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5eeb1d20cc38..064acb04be0f 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -94,6 +94,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
return more_reuse;
}
+static void reuseport_free_rcu(struct rcu_head *head)
+{
+ struct sock_reuseport *reuse;
+
+ reuse = container_of(head, struct sock_reuseport, rcu);
+ if (reuse->prog)
+ bpf_prog_destroy(reuse->prog);
+ kfree(reuse);
+}
+
/**
* reuseport_add_sock - Add a socket to the reuseport group of another.
* @sk: New socket to add to the group.
@@ -102,7 +112,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
*/
int reuseport_add_sock(struct sock *sk, struct sock *sk2)
{
- struct sock_reuseport *reuse;
+ struct sock_reuseport *old_reuse, *reuse;
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
int err = reuseport_alloc(sk2);
@@ -113,10 +123,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "socket already in reuseport group");
+ lockdep_is_held(&reuseport_lock));
+ old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (old_reuse && old_reuse->num_socks != 1) {
+ spin_unlock_bh(&reuseport_lock);
+ return -EBUSY;
+ }
if (reuse->num_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
@@ -134,19 +147,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
spin_unlock_bh(&reuseport_lock);
+ if (old_reuse)
+ call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
-static void reuseport_free_rcu(struct rcu_head *head)
-{
- struct sock_reuseport *reuse;
-
- reuse = container_of(head, struct sock_reuseport, rcu);
- if (reuse->prog)
- bpf_prog_destroy(reuse->prog);
- kfree(reuse);
-}
-
void reuseport_detach_sock(struct sock *sk)
{
struct sock_reuseport *reuse;
@@ -235,7 +240,9 @@ struct sock *reuseport_select_sock(struct sock *sk,
if (prog && skb)
sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
- else
+
+ /* no bpf or invalid bpf result: fall back to hash usage */
+ if (!sk2)
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcc..f2d0462611c3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -25,6 +25,7 @@
static int zero = 0;
static int one = 1;
+static int two __maybe_unused = 2;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
@@ -250,6 +251,46 @@ static int proc_do_rss_key(struct ctl_table *table, int write,
return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}
+#ifdef CONFIG_BPF_JIT
+static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret, jit_enable = *(int *)table->data;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &jit_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (jit_enable < 2 ||
+ (jit_enable == 2 && bpf_dump_raw_ok())) {
+ *(int *)table->data = jit_enable;
+ if (jit_enable == 2)
+ pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
+ } else {
+ ret = -EPERM;
+ }
+ }
+ return ret;
+}
+
+# ifdef CONFIG_HAVE_EBPF_JIT
+static int
+proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+# endif
+#endif
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -325,7 +366,14 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax_bpf_enable,
+# ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ .extra1 = &one,
+ .extra2 = &one,
+# else
+ .extra1 = &zero,
+ .extra2 = &two,
+# endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
@@ -333,14 +381,18 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_harden,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &zero,
+ .extra2 = &two,
},
{
.procname = "bpf_jit_kallsyms",
.data = &bpf_jit_kallsyms,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &zero,
+ .extra2 = &one,
},
# endif
#endif
diff --git a/net/core/xdp.c b/net/core/xdp.c
new file mode 100644
index 000000000000..097a0f74e004
--- /dev/null
+++ b/net/core/xdp.c
@@ -0,0 +1,73 @@
+/* net/core/xdp.c
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+
+#include <net/xdp.h>
+
+#define REG_STATE_NEW 0x0
+#define REG_STATE_REGISTERED 0x1
+#define REG_STATE_UNREGISTERED 0x2
+#define REG_STATE_UNUSED 0x3
+
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
+{
+ /* Simplify driver cleanup code paths, allow unreg "unused" */
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED)
+ return;
+
+ WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
+
+ xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
+ xdp_rxq->dev = NULL;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
+
+static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
+{
+ memset(xdp_rxq, 0, sizeof(*xdp_rxq));
+}
+
+/* Returns 0 on success, negative on failure */
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index)
+{
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
+ WARN(1, "Driver promised not to register this");
+ return -EINVAL;
+ }
+
+ if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
+ WARN(1, "Missing unregister, handled but fix driver");
+ xdp_rxq_info_unreg(xdp_rxq);
+ }
+
+ if (!dev) {
+ WARN(1, "Missing net_device from driver");
+ return -ENODEV;
+ }
+
+ /* State either UNREGISTERED or NEW */
+ xdp_rxq_info_init(xdp_rxq);
+ xdp_rxq->dev = dev;
+ xdp_rxq->queue_index = queue_index;
+
+ xdp_rxq->reg_state = REG_STATE_REGISTERED;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
+{
+ xdp_rxq->reg_state = REG_STATE_UNUSED;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
+
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
+{
+ return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 8c0ef71bed2f..b270e84d9c13 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -39,23 +39,6 @@ config IP_DCCP_DEBUG
Just say N.
-config NET_DCCPPROBE
- tristate "DCCP connection probing"
- depends on PROC_FS && KPROBES
- ---help---
- This module allows for capturing the changes to DCCP connection
- state in response to incoming packets. It is used for debugging
- DCCP congestion avoidance modules. If you don't understand
- what was just said, you don't need it: say N.
-
- Documentation on how to use DCCP connection probing can be found
- at:
-
- http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe
-
- To compile this code as a module, choose M here: the
- module will be called dccp_probe.
-
endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2e7b56097bc4..5b4ff37bc806 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -21,9 +21,10 @@ obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
dccp_ipv6-y := ipv6.o
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
-obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
dccp-$(CONFIG_SYSCTL) += sysctl.o
dccp_diag-y := diag.o
-dccp_probe-y := probe.o
+
+# build with local directory for trace.h
+CFLAGS_proto.o := -I$(src)
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 3de0d0362d7f..2a24f7d171a5 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -228,7 +228,7 @@ static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
}
if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
- DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
+ DCCP_CRIT("Ack Vector buffer overflow: dropping old entries");
av->av_overflow = true;
}
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 1c75cd1255f6..92d016e87816 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t)
ccid2_pr_debug("RTO_EXPIRE\n");
+ if (sk->sk_state == DCCP_CLOSED)
+ goto out;
+
/* back-off timer */
hc->tx_rto <<= 1;
if (hc->tx_rto > DCCP_RTO_MAX)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0c55ffb859bf..f91e3816806b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,7 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
-unsigned int dccp_poll(struct file *file, struct socket *sock,
+__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait);
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void dccp_req_err(struct sock *sk, u64 seq);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index abd07a443219..37ccbe62eb1a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -57,10 +57,17 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
+ /* tw_timer is pinned, so we need to make sure BH are disabled
+ * in following section, otherwise timer handler could run before
+ * we complete the initialization.
+ */
+ local_bh_disable();
inet_twsk_schedule(tw, timeo);
- /* Linkage updates. */
- __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
- inet_twsk_put(tw);
+ /* Linkage updates.
+ * Note that access to tw after this point is illegal.
+ */
+ inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+ local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
deleted file mode 100644
index 3d3fda05b32d..000000000000
--- a/net/dccp/probe.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * dccp_probe - Observe the DCCP flow with kprobes.
- *
- * The idea for this came from Werner Almesberger's umlsim
- * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
- *
- * Modified for DCCP from Stephen Hemminger's code
- * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/kprobes.h>
-#include <linux/socket.h>
-#include <linux/dccp.h>
-#include <linux/proc_fs.h>
-#include <linux/module.h>
-#include <linux/kfifo.h>
-#include <linux/vmalloc.h>
-#include <linux/time64.h>
-#include <linux/gfp.h>
-#include <net/net_namespace.h>
-
-#include "dccp.h"
-#include "ccid.h"
-#include "ccids/ccid3.h"
-
-static int port;
-
-static int bufsize = 64 * 1024;
-
-static const char procname[] = "dccpprobe";
-
-static struct {
- struct kfifo fifo;
- spinlock_t lock;
- wait_queue_head_t wait;
- struct timespec64 tstart;
-} dccpw;
-
-static void printl(const char *fmt, ...)
-{
- va_list args;
- int len;
- struct timespec64 now;
- char tbuf[256];
-
- va_start(args, fmt);
- getnstimeofday64(&now);
-
- now = timespec64_sub(now, dccpw.tstart);
-
- len = sprintf(tbuf, "%lu.%06lu ",
- (unsigned long) now.tv_sec,
- (unsigned long) now.tv_nsec / NSEC_PER_USEC);
- len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
- va_end(args);
-
- kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
- wake_up(&dccpw.wait);
-}
-
-static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
-{
- const struct inet_sock *inet = inet_sk(sk);
- struct ccid3_hc_tx_sock *hc = NULL;
-
- if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
- hc = ccid3_hc_tx_sk(sk);
-
- if (port == 0 || ntohs(inet->inet_dport) == port ||
- ntohs(inet->inet_sport) == port) {
- if (hc)
- printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n",
- &inet->inet_saddr, ntohs(inet->inet_sport),
- &inet->inet_daddr, ntohs(inet->inet_dport), size,
- hc->tx_s, hc->tx_rtt, hc->tx_p,
- hc->tx_x_calc, hc->tx_x_recv >> 6,
- hc->tx_x >> 6, hc->tx_t_ipi);
- else
- printl("%pI4:%u %pI4:%u %d\n",
- &inet->inet_saddr, ntohs(inet->inet_sport),
- &inet->inet_daddr, ntohs(inet->inet_dport),
- size);
- }
-
- jprobe_return();
- return 0;
-}
-
-static struct jprobe dccp_send_probe = {
- .kp = {
- .symbol_name = "dccp_sendmsg",
- },
- .entry = jdccp_sendmsg,
-};
-
-static int dccpprobe_open(struct inode *inode, struct file *file)
-{
- kfifo_reset(&dccpw.fifo);
- getnstimeofday64(&dccpw.tstart);
- return 0;
-}
-
-static ssize_t dccpprobe_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- int error = 0, cnt = 0;
- unsigned char *tbuf;
-
- if (!buf)
- return -EINVAL;
-
- if (len == 0)
- return 0;
-
- tbuf = vmalloc(len);
- if (!tbuf)
- return -ENOMEM;
-
- error = wait_event_interruptible(dccpw.wait,
- kfifo_len(&dccpw.fifo) != 0);
- if (error)
- goto out_free;
-
- cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
- error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
-
-out_free:
- vfree(tbuf);
-
- return error ? error : cnt;
-}
-
-static const struct file_operations dccpprobe_fops = {
- .owner = THIS_MODULE,
- .open = dccpprobe_open,
- .read = dccpprobe_read,
- .llseek = noop_llseek,
-};
-
-static __init int dccpprobe_init(void)
-{
- int ret = -ENOMEM;
-
- init_waitqueue_head(&dccpw.wait);
- spin_lock_init(&dccpw.lock);
- if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
- return ret;
- if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops))
- goto err0;
-
- ret = register_jprobe(&dccp_send_probe);
- if (ret) {
- ret = request_module("dccp");
- if (!ret)
- ret = register_jprobe(&dccp_send_probe);
- }
-
- if (ret)
- goto err1;
-
- pr_info("DCCP watch registered (port=%d)\n", port);
- return 0;
-err1:
- remove_proc_entry(procname, init_net.proc_net);
-err0:
- kfifo_free(&dccpw.fifo);
- return ret;
-}
-module_init(dccpprobe_init);
-
-static __exit void dccpprobe_exit(void)
-{
- kfifo_free(&dccpw.fifo);
- remove_proc_entry(procname, init_net.proc_net);
- unregister_jprobe(&dccp_send_probe);
-
-}
-module_exit(dccpprobe_exit);
-
-MODULE_PARM_DESC(port, "Port to match (0=all)");
-module_param(port, int, 0);
-
-MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
-module_param(bufsize, int, 0);
-
-MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
-MODULE_DESCRIPTION("DCCP snooper");
-MODULE_LICENSE("GPL");
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b68168fcc06a..74685fecfdb9 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -38,6 +38,9 @@
#include "dccp.h"
#include "feat.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics);
@@ -110,7 +113,7 @@ void dccp_set_state(struct sock *sk, const int state)
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
- sk->sk_state = state;
+ inet_sk_set_state(sk, state);
}
EXPORT_SYMBOL_GPL(dccp_set_state);
@@ -259,6 +262,7 @@ int dccp_disconnect(struct sock *sk, int flags)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
int err = 0;
const int old_state = sk->sk_state;
@@ -278,6 +282,10 @@ int dccp_disconnect(struct sock *sk, int flags)
sk->sk_err = ECONNRESET;
dccp_clear_xmit_timers(sk);
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_rx_ccid = NULL;
+ dp->dccps_hc_tx_ccid = NULL;
__skb_queue_purge(&sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_write_queue);
@@ -313,10 +321,10 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
* take care of normal races (between the test and the event) and we don't
* go look at any of the socket buffers directly.
*/
-unsigned int dccp_poll(struct file *file, struct socket *sock,
+__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- unsigned int mask;
+ __poll_t mask;
struct sock *sk = sock->sk;
sock_poll_wait(file, sk_sleep(sk), wait);
@@ -756,6 +764,8 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int rc, size;
long timeo;
+ trace_dccp_probe(sk, len);
+
if (len > dp->dccps_mss_cache)
return -EMSGSIZE;
diff --git a/net/dccp/trace.h b/net/dccp/trace.h
new file mode 100644
index 000000000000..5062421beee9
--- /dev/null
+++ b/net/dccp/trace.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dccp
+
+#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DCCP_H
+
+#include <net/sock.h>
+#include "dccp.h"
+#include "ccids/ccid3.h"
+#include <linux/tracepoint.h>
+#include <trace/events/net_probe_common.h>
+
+TRACE_EVENT(dccp_probe,
+
+ TP_PROTO(struct sock *sk, size_t size),
+
+ TP_ARGS(sk, size),
+
+ TP_STRUCT__entry(
+ /* sockaddr_in6 is always bigger than sockaddr_in */
+ __array(__u8, saddr, sizeof(struct sockaddr_in6))
+ __array(__u8, daddr, sizeof(struct sockaddr_in6))
+ __field(__u16, sport)
+ __field(__u16, dport)
+ __field(__u16, size)
+ __field(__u16, tx_s)
+ __field(__u32, tx_rtt)
+ __field(__u32, tx_p)
+ __field(__u32, tx_x_calc)
+ __field(__u64, tx_x_recv)
+ __field(__u64, tx_x)
+ __field(__u32, tx_t_ipi)
+ ),
+
+ TP_fast_assign(
+ const struct inet_sock *inet = inet_sk(sk);
+ struct ccid3_hc_tx_sock *hc = NULL;
+
+ if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
+ hc = ccid3_hc_tx_sk(sk);
+
+ memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+ memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+ TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
+ /* For filtering use */
+ __entry->sport = ntohs(inet->inet_sport);
+ __entry->dport = ntohs(inet->inet_dport);
+
+ __entry->size = size;
+ if (hc) {
+ __entry->tx_s = hc->tx_s;
+ __entry->tx_rtt = hc->tx_rtt;
+ __entry->tx_p = hc->tx_p;
+ __entry->tx_x_calc = hc->tx_x_calc;
+ __entry->tx_x_recv = hc->tx_x_recv >> 6;
+ __entry->tx_x = hc->tx_x >> 6;
+ __entry->tx_t_ipi = hc->tx_t_ipi;
+ } else {
+ __entry->tx_s = 0;
+ memset(&__entry->tx_rtt, 0, (void *)&__entry->tx_t_ipi -
+ (void *)&__entry->tx_rtt +
+ sizeof(__entry->tx_t_ipi));
+ }
+ ),
+
+ TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d "
+ "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d",
+ __entry->saddr, __entry->daddr, __entry->size,
+ __entry->tx_s, __entry->tx_rtt, __entry->tx_p,
+ __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x,
+ __entry->tx_t_ipi)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 518cea17b811..cc1b505453a8 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1209,11 +1209,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
}
-static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- int mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll(file, sock, wait);
if (!skb_queue_empty(&scp->other_receive_queue))
mask |= POLLRDBAND;
@@ -2320,7 +2320,6 @@ static int dn_socket_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dn_socket_seq_fops = {
- .owner = THIS_MODULE,
.open = dn_socket_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 9153247dad28..c9f5e1ebb9c8 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1389,7 +1389,6 @@ static int dn_dev_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dn_dev_seq_fops = {
- .owner = THIS_MODULE,
.open = dn_dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1418,9 +1417,12 @@ void __init dn_dev_init(void)
dn_dev_devices_on();
- rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0);
- rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0);
- rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR,
+ dn_nl_newaddr, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR,
+ dn_nl_deladdr, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
+ NULL, dn_nl_dump_ifaddr, 0);
proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index b37a1b833c77..fce94cbd4378 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -792,8 +792,10 @@ void __init dn_fib_init(void)
register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
- rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0);
- rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE,
+ dn_fib_rtm_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
+ dn_fib_rtm_delroute, NULL, 0);
}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 528119a5618e..6e37d9e6345e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -597,7 +597,6 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dn_neigh_seq_fops = {
- .owner = THIS_MODULE,
.open = dn_neigh_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 324cb9f2f551..ef20b8e31669 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -199,11 +199,11 @@ static void dn_dst_check_expire(struct timer_list *unused)
lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) > 1 ||
(now - rt->dst.lastuse) < expire) {
- rtp = &rt->dst.dn_next;
+ rtp = &rt->dn_next;
continue;
}
- *rtp = rt->dst.dn_next;
- rt->dst.dn_next = NULL;
+ *rtp = rt->dn_next;
+ rt->dn_next = NULL;
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
@@ -233,11 +233,11 @@ static int dn_dst_gc(struct dst_ops *ops)
lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) > 1 ||
(now - rt->dst.lastuse) < expire) {
- rtp = &rt->dst.dn_next;
+ rtp = &rt->dn_next;
continue;
}
- *rtp = rt->dst.dn_next;
- rt->dst.dn_next = NULL;
+ *rtp = rt->dn_next;
+ rt->dn_next = NULL;
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
break;
@@ -333,8 +333,8 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
if (compare_keys(&rth->fld, &rt->fld)) {
/* Put it first */
- *rthp = rth->dst.dn_next;
- rcu_assign_pointer(rth->dst.dn_next,
+ *rthp = rth->dn_next;
+ rcu_assign_pointer(rth->dn_next,
dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
@@ -345,10 +345,10 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
*rp = rth;
return 0;
}
- rthp = &rth->dst.dn_next;
+ rthp = &rth->dn_next;
}
- rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
+ rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
dst_hold_and_use(&rt->dst, now);
@@ -369,8 +369,8 @@ static void dn_run_flush(struct timer_list *unused)
goto nothing_to_declare;
for(; rt; rt = next) {
- next = rcu_dereference_raw(rt->dst.dn_next);
- RCU_INIT_POINTER(rt->dst.dn_next, NULL);
+ next = rcu_dereference_raw(rt->dn_next);
+ RCU_INIT_POINTER(rt->dn_next, NULL);
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
@@ -1183,6 +1183,7 @@ make_route:
if (rt == NULL)
goto e_nobufs;
+ rt->dn_next = NULL;
memset(&rt->fld, 0, sizeof(rt->fld));
rt->fld.saddr = oldflp->saddr;
rt->fld.daddr = oldflp->daddr;
@@ -1252,7 +1253,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
if (!(flags & MSG_TRYHARD)) {
rcu_read_lock_bh();
for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
- rt = rcu_dereference_bh(rt->dst.dn_next)) {
+ rt = rcu_dereference_bh(rt->dn_next)) {
if ((flp->daddr == rt->fld.daddr) &&
(flp->saddr == rt->fld.saddr) &&
(flp->flowidn_mark == rt->fld.flowidn_mark) &&
@@ -1448,6 +1449,7 @@ make_route:
if (rt == NULL)
goto e_nobufs;
+ rt->dn_next = NULL;
memset(&rt->fld, 0, sizeof(rt->fld));
rt->rt_saddr = fld.saddr;
rt->rt_daddr = fld.daddr;
@@ -1529,7 +1531,7 @@ static int dn_route_input(struct sk_buff *skb)
rcu_read_lock();
for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
- rt = rcu_dereference(rt->dst.dn_next)) {
+ rt = rcu_dereference(rt->dn_next)) {
if ((rt->fld.saddr == cb->src) &&
(rt->fld.daddr == cb->dst) &&
(rt->fld.flowidn_oif == 0) &&
@@ -1749,7 +1751,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock_bh();
for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
rt;
- rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
+ rt = rcu_dereference_bh(rt->dn_next), idx++) {
if (idx < s_idx)
continue;
skb_dst_set(skb, dst_clone(&rt->dst));
@@ -1795,7 +1797,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
{
struct dn_rt_cache_iter_state *s = seq->private;
- rt = rcu_dereference_bh(rt->dst.dn_next);
+ rt = rcu_dereference_bh(rt->dn_next);
while (!rt) {
rcu_read_unlock_bh();
if (--s->bucket < 0)
@@ -1858,7 +1860,6 @@ static int dn_rt_cache_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dn_rt_cache_seq_fops = {
- .owner = THIS_MODULE,
.open = dn_rt_cache_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1921,11 +1922,11 @@ void __init dn_route_init(void)
&dn_rt_cache_seq_fops);
#ifdef CONFIG_DECNET_ROUTER
- rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
- dn_fib_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
+ dn_cache_getroute, dn_fib_dump, 0);
#else
- rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
- dn_cache_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
+ dn_cache_getroute, dn_cache_dump, 0);
#endif
}
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 03c3bdf25468..bbf2c82cf7b2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -16,6 +16,15 @@ config NET_DSA
if NET_DSA
+config NET_DSA_LEGACY
+ bool "Support for older platform device and Device Tree registration"
+ default y
+ ---help---
+ Say Y if you want to enable support for the older platform device and
+ deprecated Device Tree binding registration.
+
+ This feature is scheduled for removal in 4.17.
+
# tagging formats
config NET_DSA_TAG_BRCM
bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 0e13c1f95d13..9e4d3536f977 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
+dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 44e3fb7dec8c..adf50fbc4c13 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
INIT_LIST_HEAD(&dst->list);
list_add_tail(&dsa_tree_list, &dst->list);
- /* Initialize the reference counter to the number of switches, not 1 */
kref_init(&dst->refcount);
- refcount_set(&dst->refcount.refcount, 0);
return dst;
}
@@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst)
kfree(dst);
}
-static struct dsa_switch_tree *dsa_tree_touch(int index)
+static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst)
{
- struct dsa_switch_tree *dst;
-
- dst = dsa_tree_find(index);
- if (!dst)
- dst = dsa_tree_alloc(index);
+ if (dst)
+ kref_get(&dst->refcount);
return dst;
}
-static void dsa_tree_get(struct dsa_switch_tree *dst)
+static struct dsa_switch_tree *dsa_tree_touch(int index)
{
- kref_get(&dst->refcount);
+ struct dsa_switch_tree *dst;
+
+ dst = dsa_tree_find(index);
+ if (dst)
+ return dsa_tree_get(dst);
+ else
+ return dsa_tree_alloc(index);
}
static void dsa_tree_release(struct kref *ref)
@@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref)
static void dsa_tree_put(struct dsa_switch_tree *dst)
{
- kref_put(&dst->refcount, dsa_tree_release);
+ if (dst)
+ kref_put(&dst->refcount, dsa_tree_release);
}
static bool dsa_port_is_dsa(struct dsa_port *port)
@@ -239,7 +241,7 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
for (port = 0; port < ds->num_ports; port++) {
dp = &ds->ports[port];
- if (dsa_port_is_user(dp))
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
dp->cpu_dp = dst->cpu_dp;
}
}
@@ -269,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- err = dsa_port_fixed_link_register_of(dp);
+ err = dsa_port_link_register_of(dp);
if (err) {
- dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+ dev_err(ds->dev, "failed to setup link for port %d.%d\n",
ds->index, dp->index);
return err;
}
-
break;
case DSA_PORT_TYPE_USER:
err = dsa_slave_create(dp);
@@ -299,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- dsa_port_fixed_link_unregister_of(dp);
+ dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
if (dp->slave) {
@@ -765,6 +766,7 @@ int dsa_register_switch(struct dsa_switch *ds)
mutex_lock(&dsa2_mutex);
err = dsa_switch_probe(ds);
+ dsa_tree_put(ds->dst);
mutex_unlock(&dsa2_mutex);
return err;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 7d036696e8c4..70de7895e5b8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -97,8 +97,17 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
bool dsa_schedule_work(struct work_struct *work);
/* legacy.c */
+#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
int dsa_legacy_register(void);
void dsa_legacy_unregister(void);
+#else
+static inline int dsa_legacy_register(void)
+{
+ return 0;
+}
+
+static inline void dsa_legacy_unregister(void) { }
+#endif
int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid,
@@ -157,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_fixed_link_register_of(struct dsa_port *dp);
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
+int dsa_port_link_register_of(struct dsa_port *dp);
+void dsa_port_link_unregister_of(struct dsa_port *dp);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 84611d7fcfa2..cb54b81d0bd9 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
+ ret = dsa_port_link_register_of(&ds->ports[port]);
if (ret)
return ret;
}
@@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
for (port = 0; port < ds->num_ports; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- dsa_port_fixed_link_unregister_of(&ds->ports[port]);
+ dsa_port_link_unregister_of(&ds->ports[port]);
}
if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -718,26 +718,6 @@ static int dsa_resume(struct device *d)
}
#endif
-/* legacy way, bypassing the bridge *****************************************/
-int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid,
- u16 flags)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_fdb_add(dp, addr, vid);
-}
-
-int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_fdb_del(dp, addr, vid);
-}
-
static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
static const struct of_device_id dsa_of_match_table[] = {
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bb4be2679904..7acc1169d75e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return 0;
}
-int dsa_port_fixed_link_register_of(struct dsa_port *dp)
+static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+{
+ struct device_node *port_dn = dp->dn;
+ struct device_node *phy_dn;
+ struct dsa_switch *ds = dp->ds;
+ struct phy_device *phydev;
+ int port = dp->index;
+ int err = 0;
+
+ phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
+ if (!phy_dn)
+ return 0;
+
+ phydev = of_phy_find_device(phy_dn);
+ if (!phydev) {
+ err = -EPROBE_DEFER;
+ goto err_put_of;
+ }
+
+ if (enable) {
+ err = genphy_config_init(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_resume(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_read_status(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ } else {
+ err = genphy_suspend(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ }
+
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
+
+ dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
+
+err_put_dev:
+ put_device(&phydev->mdio.dev);
+err_put_of:
+ of_node_put(phy_dn);
+ return err;
+}
+
+static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
{
struct device_node *dn = dp->dn;
struct dsa_switch *ds = dp->ds;
@@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp)
int mode;
int err;
- if (of_phy_is_fixed_link(dn)) {
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
+ err = of_phy_register_fixed_link(dn);
+ if (err) {
+ dev_err(ds->dev,
+ "failed to register the fixed PHY of port %d\n",
+ port);
+ return err;
+ }
- phydev = of_phy_find_device(dn);
+ phydev = of_phy_find_device(dn);
- mode = of_get_phy_mode(dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
+ mode = of_get_phy_mode(dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
- genphy_config_init(phydev);
- genphy_read_status(phydev);
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
- put_device(&phydev->mdio.dev);
- }
+ put_device(&phydev->mdio.dev);
return 0;
}
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
+int dsa_port_link_register_of(struct dsa_port *dp)
{
- struct device_node *dn = dp->dn;
+ if (of_phy_is_fixed_link(dp->dn))
+ return dsa_port_fixed_link_register_of(dp);
+ else
+ return dsa_port_setup_phy_of(dp, true);
+}
- if (of_phy_is_fixed_link(dn))
- of_phy_deregister_fixed_link(dn);
+void dsa_port_link_unregister_of(struct dsa_port *dp)
+{
+ if (of_phy_is_fixed_link(dp->dn))
+ of_phy_deregister_fixed_link(dp->dn);
+ else
+ dsa_port_setup_phy_of(dp, false);
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6e7a642493b..f52307296de4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
#include <linux/of_net.h>
#include <linux/of_mdio.h>
#include <linux/mdio.h>
-#include <linux/list.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
@@ -709,14 +708,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
__be16 protocol = cls->common.protocol;
- struct net *net = dev_net(dev);
struct dsa_switch *ds = dp->ds;
struct net_device *to_dev;
const struct tc_action *a;
struct dsa_port *to_dp;
int err = -EOPNOTSUPP;
LIST_HEAD(actions);
- int ifindex;
if (!ds->ops->port_mirror_add)
return err;
@@ -730,8 +727,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
struct dsa_mall_mirror_tc_entry *mirror;
- ifindex = tcf_mirred_ifindex(a);
- to_dev = __dev_get_by_index(net, ifindex);
+ to_dev = tcf_mirred_dev(a);
if (!to_dev)
return -EINVAL;
@@ -944,6 +940,26 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.set_rxnfc = dsa_slave_set_rxnfc,
};
+/* legacy way, bypassing the bridge *****************************************/
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid,
+ u16 flags)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dsa_port_fdb_add(dp, addr, vid);
+}
+
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dsa_port_fdb_del(dp, addr, vid);
+}
+
static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 29608d087a7c..b93511726069 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -83,29 +83,52 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- /* Do not care yet about other switch chips of the fabric */
- if (ds->index != info->sw_index)
- return 0;
+ int port = dsa_towards_port(ds, info->sw_index, info->port);
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- return ds->ops->port_fdb_add(ds, info->port, info->addr,
- info->vid);
+ return ds->ops->port_fdb_add(ds, port, info->addr, info->vid);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- /* Do not care yet about other switch chips of the fabric */
- if (ds->index != info->sw_index)
- return 0;
+ int port = dsa_towards_port(ds, info->sw_index, info->port);
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- return ds->ops->port_fdb_del(ds, info->port, info->addr,
- info->vid);
+ return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
+}
+
+static int
+dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_mdb *mdb,
+ const unsigned long *bitmap)
+{
+ int port, err;
+
+ if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+ return -EOPNOTSUPP;
+
+ for_each_set_bit(port, bitmap, ds->num_ports) {
+ err = ds->ops->port_mdb_prepare(ds, port, mdb);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_mdb *mdb,
+ const unsigned long *bitmap)
+{
+ int port;
+
+ for_each_set_bit(port, bitmap, ds->num_ports)
+ ds->ops->port_mdb_add(ds, port, mdb);
}
static int dsa_switch_mdb_add(struct dsa_switch *ds,
@@ -114,7 +137,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
const struct switchdev_obj_port_mdb *mdb = info->mdb;
struct switchdev_trans *trans = info->trans;
DECLARE_BITMAP(group, ds->num_ports);
- int port, err;
+ int port;
/* Build a mask of Multicast group members */
bitmap_zero(group, ds->num_ports);
@@ -124,21 +147,10 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (dsa_is_dsa_port(ds, port))
set_bit(port, group);
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
- return -EOPNOTSUPP;
-
- for_each_set_bit(port, group, ds->num_ports) {
- err = ds->ops->port_mdb_prepare(ds, port, mdb, trans);
- if (err)
- return err;
- }
-
- return 0;
- }
+ if (switchdev_trans_ph_prepare(trans))
+ return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
- for_each_set_bit(port, group, ds->num_ports)
- ds->ops->port_mdb_add(ds, port, mdb, trans);
+ dsa_switch_mdb_add_bitmap(ds, mdb, group);
return 0;
}
@@ -157,13 +169,43 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds,
return 0;
}
+static int
+dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_vlan *vlan,
+ const unsigned long *bitmap)
+{
+ int port, err;
+
+ if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+ return -EOPNOTSUPP;
+
+ for_each_set_bit(port, bitmap, ds->num_ports) {
+ err = ds->ops->port_vlan_prepare(ds, port, vlan);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+dsa_switch_vlan_add_bitmap(struct dsa_switch *ds,
+ const struct switchdev_obj_port_vlan *vlan,
+ const unsigned long *bitmap)
+{
+ int port;
+
+ for_each_set_bit(port, bitmap, ds->num_ports)
+ ds->ops->port_vlan_add(ds, port, vlan);
+}
+
static int dsa_switch_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
const struct switchdev_obj_port_vlan *vlan = info->vlan;
struct switchdev_trans *trans = info->trans;
DECLARE_BITMAP(members, ds->num_ports);
- int port, err;
+ int port;
/* Build a mask of VLAN members */
bitmap_zero(members, ds->num_ports);
@@ -173,21 +215,10 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
set_bit(port, members);
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
- return -EOPNOTSUPP;
-
- for_each_set_bit(port, members, ds->num_ports) {
- err = ds->ops->port_vlan_prepare(ds, port, vlan, trans);
- if (err)
- return err;
- }
-
- return 0;
- }
+ if (switchdev_trans_ph_prepare(trans))
+ return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
- for_each_set_bit(port, members, ds->num_ports)
- ds->ops->port_vlan_add(ds, port, vlan, trans);
+ dsa_switch_vlan_add_bitmap(ds, vlan, members);
return 0;
}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e6e0b7b6025c..2b06bb91318b 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -70,6 +70,18 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
return NULL;
+ /* The Ethernet switch we are interfaced with needs packets to be at
+ * least 64 bytes (including FCS) otherwise they will be discarded when
+ * they enter the switch port logic. When Broadcom tags are enabled, we
+ * need to make sure that packets are at least 68 bytes
+ * (including FCS and tag) because the length verification is done after
+ * the Broadcom tag is stripped off the ingress packet.
+ *
+ * Let dsa_slave_xmit() free the SKB
+ */
+ if (__skb_put_padto(skb, ETH_ZLEN + BRCM_TAG_LEN, false))
+ return NULL;
+
skb_push(skb, BRCM_TAG_LEN);
if (offset)
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 8475434af7d5..11535bc70743 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,10 +13,13 @@
*/
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include "dsa_priv.h"
#define MTK_HDR_LEN 4
+#define MTK_HDR_XMIT_UNTAGGED 0
+#define MTK_HDR_XMIT_TAGGED_TPID_8100 1
#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
@@ -25,20 +28,37 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *mtk_tag;
+ bool is_vlan_skb = true;
- if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
- return NULL;
-
- skb_push(skb, MTK_HDR_LEN);
+ /* Build the special tag after the MAC Source Address. If VLAN header
+ * is present, it's required that VLAN header and special tag is
+ * being combined. Only in this way we can allow the switch can parse
+ * the both special and VLAN tag at the same time and then look up VLAN
+ * table with VID.
+ */
+ if (!skb_vlan_tagged(skb)) {
+ if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
+ return NULL;
- memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+ skb_push(skb, MTK_HDR_LEN);
+ memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+ is_vlan_skb = false;
+ }
- /* Build the tag after the MAC Source Address */
mtk_tag = skb->data + 2 * ETH_ALEN;
- mtk_tag[0] = 0;
+
+ /* Mark tag attribute on special tag insertion to notify hardware
+ * whether that's a combined special tag with 802.1Q header.
+ */
+ mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
+ MTK_HDR_XMIT_UNTAGGED;
mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
- mtk_tag[2] = 0;
- mtk_tag[3] = 0;
+
+ /* Tag control information is kept for 802.1Q */
+ if (!is_vlan_skb) {
+ mtk_tag[2] = 0;
+ mtk_tag[3] = 0;
+ }
return skb;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6c8ad1d4b6d..47a0a6649a9d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -43,7 +43,6 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o
obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
-obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f00499a46927..c24008daa3d8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -121,6 +121,7 @@
#endif
#include <net/l3mdev.h>
+#include <trace/events/sock.h>
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
@@ -789,7 +790,8 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int addr_len = 0;
int err;
- sock_rps_record_flow(sk);
+ if (likely(!(flags & MSG_ERRQUEUE)))
+ sock_rps_record_flow(sk);
err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
@@ -870,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct sock *sk = sock->sk;
int err = 0;
struct net *net = sock_net(sk);
+ void __user *p = (void __user *)arg;
+ struct ifreq ifr;
+ struct rtentry rt;
switch (cmd) {
case SIOCGSTAMP:
@@ -880,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
case SIOCADDRT:
case SIOCDELRT:
+ if (copy_from_user(&rt, p, sizeof(struct rtentry)))
+ return -EFAULT;
+ err = ip_rt_ioctl(net, cmd, &rt);
+ break;
case SIOCRTMSG:
- err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ err = -EINVAL;
break;
case SIOCDARP:
case SIOCGARP:
@@ -889,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = arp_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
- case SIOCSIFADDR:
case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
case SIOCGIFDSTADDR:
+ case SIOCGIFPFLAGS:
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
+ if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+ err = -EFAULT;
+ break;
+
+ case SIOCSIFADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCSIFNETMASK:
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
- case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
- err = devinet_ioctl(net, cmd, (void __user *)arg);
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
break;
default:
if (sk->sk_prot->ioctl)
@@ -1220,6 +1238,19 @@ int inet_sk_rebuild_header(struct sock *sk)
}
EXPORT_SYMBOL(inet_sk_rebuild_header);
+void inet_sk_set_state(struct sock *sk, int state)
+{
+ trace_inet_sock_set_state(sk, sk->sk_state, state);
+ sk->sk_state = state;
+}
+EXPORT_SYMBOL(inet_sk_set_state);
+
+void inet_sk_state_store(struct sock *sk, int newstate)
+{
+ trace_inet_sock_set_state(sk, sk->sk_state, newstate);
+ smp_store_release(&sk->sk_state, newstate);
+}
+
struct sk_buff *inet_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a8d7c5a9fb05..f28f06c91ead 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
static int arp_constructor(struct neighbour *neigh)
{
- __be32 addr = *(__be32 *)neigh->primary_key;
+ __be32 addr;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
+ u32 inaddr_any = INADDR_ANY;
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
+
+ addr = *(__be32 *)neigh->primary_key;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev) {
@@ -1420,7 +1425,6 @@ static int arp_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations arp_seq_fops = {
- .owner = THIS_MODULE,
.open = arp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4573bccd6da..40f001782c1b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr)
}
-int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
{
- struct ifreq ifr;
struct sockaddr_in sin_orig;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
struct in_device *in_dev;
struct in_ifaddr **ifap = NULL;
struct in_ifaddr *ifa = NULL;
@@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
int ret = -EFAULT;
int tryaddrmatch = 0;
- /*
- * Fetch the caller's info block into kernel space
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- goto out;
- ifr.ifr_name[IFNAMSIZ - 1] = 0;
+ ifr->ifr_name[IFNAMSIZ - 1] = 0;
/* save original address for comparison */
memcpy(&sin_orig, sin, sizeof(*sin));
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
- dev_load(net, ifr.ifr_name);
+ dev_load(net, ifr->ifr_name);
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
@@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
ret = -ENODEV;
- dev = __dev_get_by_name(net, ifr.ifr_name);
+ dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
goto done;
@@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
This is checked above. */
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
- if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
ifa->ifa_local) {
break; /* found */
@@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa) {
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next)
- if (!strcmp(ifr.ifr_name, ifa->ifa_label))
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
}
}
@@ -1055,20 +1048,24 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
+ ret = 0;
sin->sin_addr.s_addr = ifa->ifa_local;
- goto rarok;
+ break;
case SIOCGIFBRDADDR: /* Get the broadcast address */
+ ret = 0;
sin->sin_addr.s_addr = ifa->ifa_broadcast;
- goto rarok;
+ break;
case SIOCGIFDSTADDR: /* Get the destination address */
+ ret = 0;
sin->sin_addr.s_addr = ifa->ifa_address;
- goto rarok;
+ break;
case SIOCGIFNETMASK: /* Get the netmask for the interface */
+ ret = 0;
sin->sin_addr.s_addr = ifa->ifa_mask;
- goto rarok;
+ break;
case SIOCSIFFLAGS:
if (colon) {
@@ -1076,11 +1073,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa)
break;
ret = 0;
- if (!(ifr.ifr_flags & IFF_UP))
+ if (!(ifr->ifr_flags & IFF_UP))
inet_del_ifa(in_dev, ifap, 1);
break;
}
- ret = dev_change_flags(dev, ifr.ifr_flags);
+ ret = dev_change_flags(dev, ifr->ifr_flags);
break;
case SIOCSIFADDR: /* Set interface address (and family) */
@@ -1095,7 +1092,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
break;
INIT_HLIST_NODE(&ifa->hash);
if (colon)
- memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
+ memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
} else {
@@ -1182,28 +1179,27 @@ done:
rtnl_unlock();
out:
return ret;
-rarok:
- rtnl_unlock();
- ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
- goto out;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
+static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
+ if (WARN_ON(size > sizeof(struct ifreq)))
+ goto out;
+
if (!in_dev)
goto out;
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
if (!buf) {
- done += sizeof(ifr);
+ done += size;
continue;
}
- if (len < (int) sizeof(ifr))
+ if (len < size)
break;
memset(&ifr, 0, sizeof(struct ifreq));
strcpy(ifr.ifr_name, ifa->ifa_label);
@@ -1212,13 +1208,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
ifa->ifa_local;
- if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
+ if (copy_to_user(buf + done, &ifr, size)) {
done = -EFAULT;
break;
}
- buf += sizeof(struct ifreq);
- len -= sizeof(struct ifreq);
- done += sizeof(struct ifreq);
+ len -= size;
+ done += size;
}
out:
return done;
@@ -1428,7 +1423,7 @@ skip:
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d57aa64fa7c7..296d0b956bfe 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -121,14 +121,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
+ struct xfrm_offload *xo = xfrm_offload(skb);
void *tmp;
- struct dst_entry *dst = skb_dst(skb);
- struct xfrm_state *x = dst->xfrm;
+ struct xfrm_state *x;
+
+ if (xo && (xo->flags & XFRM_DEV_RESUME))
+ x = skb->sp->xvec[skb->sp->len - 1];
+ else
+ x = skb_dst(skb)->xfrm;
tmp = ESP_SKB_CB(skb)->tmp;
esp_ssg_unref(x, tmp);
kfree(tmp);
- xfrm_output_resume(skb, err);
+
+ if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+ if (err) {
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ kfree_skb(skb);
+ return;
+ }
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ secpath_reset(skb);
+ xfrm_dev_resume(skb);
+ } else {
+ xfrm_output_resume(skb, err);
+ }
}
/* Move ESP header back into place. */
@@ -825,17 +843,13 @@ static int esp_init_aead(struct xfrm_state *x)
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- u32 mask = 0;
err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
- if (x->xso.offload_handle)
- mask |= CRYPTO_ALG_ASYNC;
-
- aead = crypto_alloc_aead(aead_name, 0, mask);
+ aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -865,7 +879,6 @@ static int esp_init_authenc(struct xfrm_state *x)
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
- u32 mask = 0;
err = -EINVAL;
if (!x->ealg)
@@ -891,10 +904,7 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
}
- if (x->xso.offload_handle)
- mask |= CRYPTO_ALG_ASYNC;
-
- aead = crypto_alloc_aead(authenc_name, 0, mask);
+ aead = crypto_alloc_aead(authenc_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -981,6 +991,7 @@ static int esp_init_state(struct xfrm_state *x)
switch (encap->encap_type) {
default:
+ err = -EINVAL;
goto error;
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index f8b918c766b0..da5635fc52c2 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -38,7 +38,8 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
__be32 spi;
int err;
- skb_pull(skb, offset);
+ if (!pskb_pull(skb, offset))
+ return NULL;
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
@@ -108,75 +109,39 @@ static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
- __u32 seq;
- int err = 0;
- struct sk_buff *skb2;
struct xfrm_state *x;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
- struct sk_buff *segs = ERR_PTR(-EINVAL);
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
if (!xo)
- goto out;
+ return ERR_PTR(-EINVAL);
- seq = xo->seq.low;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
if (esph->spi != x->id.spi)
- goto out;
+ return ERR_PTR(-EINVAL);
if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
- goto out;
+ return ERR_PTR(-EINVAL);
__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
skb->encap_hdr_csum = 1;
- if (!(features & NETIF_F_HW_ESP))
+ if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+ (x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
- segs = x->outer_mode->gso_segment(x, skb, esp_features);
- if (IS_ERR_OR_NULL(segs))
- goto out;
-
- __skb_pull(skb, skb->data - skb_mac_header(skb));
-
- skb2 = segs;
- do {
- struct sk_buff *nskb = skb2->next;
-
- xo = xfrm_offload(skb2);
- xo->flags |= XFRM_GSO_SEGMENT;
- xo->seq.low = seq;
- xo->seq.hi = xfrm_replay_seqhi(x, seq);
-
- if(!(features & NETIF_F_HW_ESP))
- xo->flags |= CRYPTO_FALLBACK;
-
- x->outer_mode->xmit(x, skb2);
-
- err = x->type_offload->xmit(x, skb2, esp_features);
- if (err) {
- kfree_skb_list(segs);
- return ERR_PTR(err);
- }
-
- if (!skb_is_gso(skb2))
- seq++;
- else
- seq += skb_shinfo(skb2)->gso_segs;
-
- skb_push(skb2, skb2->mac_len);
- skb2 = nskb;
- } while (skb2);
+ xo->flags |= XFRM_GSO_SEGMENT;
-out:
- return segs;
+ return x->outer_mode->gso_segment(x, skb, esp_features);
}
static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -203,6 +168,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
struct crypto_aead *aead;
struct esp_info esp;
bool hw_offload = true;
+ __u32 seq;
esp.inplace = true;
@@ -241,23 +207,30 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
return esp.nfrags;
}
+ seq = xo->seq.low;
+
esph = esp.esph;
esph->spi = x->id.spi;
skb_push(skb, -skb_network_offset(skb));
if (xo->flags & XFRM_GSO_SEGMENT) {
- esph->seq_no = htonl(xo->seq.low);
- } else {
- ip_hdr(skb)->tot_len = htons(skb->len);
- ip_send_check(ip_hdr(skb));
+ esph->seq_no = htonl(seq);
+
+ if (!skb_is_gso(skb))
+ xo->seq.low++;
+ else
+ xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
+
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
+
if (hw_offload)
return 0;
- esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
-
err = esp_output_tail(x, skb, &esp);
if (err)
return err;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..f05afaf3235c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
* Handle IP routing ioctl calls.
* These are used to manipulate the routing tables
*/
-int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
{
struct fib_config cfg;
- struct rtentry rt;
int err;
switch (cmd) {
@@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&rt, arg, sizeof(rt)))
- return -EFAULT;
-
rtnl_lock();
- err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
+ err = rtentry_to_fib_config(net, cmd, rt, &cfg);
if (err == 0) {
struct fib_table *tb;
@@ -1298,14 +1294,19 @@ err_table_hash_alloc:
static void ip_fib_net_exit(struct net *net)
{
- unsigned int i;
+ int i;
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+ /* Destroy the tables in reverse order to guarantee that the
+ * local table, ID 255, is destroyed before the main table, ID
+ * 254. This is necessary as the local table may contain
+ * references to data contained in the main table.
+ */
+ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
- u32 val;
+ u32 fi_val, val;
if (!type)
continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
val = nla_get_u32(nla);
}
- if (fi->fib_metrics->metrics[type - 1] != val)
+ fi_val = fi->fib_metrics->metrics[type - 1];
+ if (type == RTAX_FEATURES)
+ fi_val &= ~DST_FEATURE_ECN_CA;
+
+ if (fi_val != val)
return false;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5ddc4aefff12..5530cd6fdbc7 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2334,7 +2334,6 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations fib_triestat_fops = {
- .owner = THIS_MODULE,
.open = fib_triestat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2521,7 +2520,6 @@ static int fib_trie_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations fib_trie_fops = {
- .owner = THIS_MODULE,
.open = fib_trie_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2715,7 +2713,6 @@ static int fib_route_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations fib_route_fops = {
- .owner = THIS_MODULE,
.open = fib_route_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1f8f302dbf3..f2402581fef1 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
#include <linux/rtnetlink.h>
#include <linux/times.h>
#include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
#include <net/net_namespace.h>
#include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return htonl(INADDR_ANY);
+
+ for_ifa(in_dev) {
+ if (fl4->saddr == ifa->ifa_local)
+ return fl4->saddr;
+ } endfor_ifa(in_dev);
+
+ return htonl(INADDR_ANY);
+}
+
static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
@@ -368,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
pip->daddr = fl4.daddr;
- pip->saddr = fl4.saddr;
+
+ rcu_read_lock();
+ pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
+ rcu_read_unlock();
+
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(net, skb, NULL);
@@ -404,16 +426,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -436,12 +459,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +490,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -498,12 +526,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -538,7 +566,7 @@ empty_source:
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
@@ -2808,7 +2836,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations igmp_mc_seq_fops = {
- .owner = THIS_MODULE,
.open = igmp_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2955,7 +2982,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations igmp_mcf_seq_fops = {
- .owner = THIS_MODULE,
.open = igmp_mcf_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc08e63..881ac6d046f2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
}
spin_unlock_bh(&queue->fastopenq.lock);
}
- mem_cgroup_sk_alloc(newsk);
out:
release_sock(sk);
if (req)
@@ -685,7 +684,7 @@ static void reqsk_timer_handler(struct timer_list *t)
int max_retries, thresh;
u8 defer_accept;
- if (sk_state_load(sk_listener) != TCP_LISTEN)
+ if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;
max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
@@ -783,7 +782,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
- newsk->sk_state = TCP_SYN_RECV;
+ inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -877,7 +876,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
- sk_state_store(sk, TCP_LISTEN);
+ inet_sk_state_store(sk, TCP_LISTEN);
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num);
@@ -888,7 +887,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
return 0;
}
- sk->sk_state = TCP_CLOSE;
+ inet_sk_set_state(sk, TCP_CLOSE);
return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c9c35b61a027..a383f299ce24 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -564,12 +564,18 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
case INET_DIAG_BC_JMP:
yes = 0;
break;
+ case INET_DIAG_BC_S_EQ:
+ yes = entry->sport == op[1].no;
+ break;
case INET_DIAG_BC_S_GE:
yes = entry->sport >= op[1].no;
break;
case INET_DIAG_BC_S_LE:
yes = entry->sport <= op[1].no;
break;
+ case INET_DIAG_BC_D_EQ:
+ yes = entry->dport == op[1].no;
+ break;
case INET_DIAG_BC_D_GE:
yes = entry->dport >= op[1].no;
break;
@@ -802,8 +808,10 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
if (!valid_devcond(bc, len, &min_len))
return -EINVAL;
break;
+ case INET_DIAG_BC_S_EQ:
case INET_DIAG_BC_S_GE:
case INET_DIAG_BC_S_LE:
+ case INET_DIAG_BC_D_EQ:
case INET_DIAG_BC_D_GE:
case INET_DIAG_BC_D_LE:
if (!valid_port_comparison(bc, len, &min_len))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e7d15fb0d94d..31ff46daae97 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/vmalloc.h>
+#include <linux/bootmem.h>
#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
@@ -168,6 +169,60 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
+static struct inet_listen_hashbucket *
+inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
+{
+ u32 hash;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(sock_net(sk),
+ &sk->sk_v6_rcv_saddr,
+ inet_sk(sk)->inet_num);
+ else
+#endif
+ hash = ipv4_portaddr_hash(sock_net(sk),
+ inet_sk(sk)->inet_rcv_saddr,
+ inet_sk(sk)->inet_num);
+ return inet_lhash2_bucket(h, hash);
+}
+
+static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
+{
+ struct inet_listen_hashbucket *ilb2;
+
+ if (!h->lhash2)
+ return;
+
+ ilb2 = inet_lhash2_bucket_sk(h, sk);
+
+ spin_lock(&ilb2->lock);
+ if (sk->sk_reuseport && sk->sk_family == AF_INET6)
+ hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
+ &ilb2->head);
+ else
+ hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
+ &ilb2->head);
+ ilb2->count++;
+ spin_unlock(&ilb2->lock);
+}
+
+static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
+{
+ struct inet_listen_hashbucket *ilb2;
+
+ if (!h->lhash2 ||
+ WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
+ return;
+
+ ilb2 = inet_lhash2_bucket_sk(h, sk);
+
+ spin_lock(&ilb2->lock);
+ hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
+ ilb2->count--;
+ spin_unlock(&ilb2->lock);
+}
+
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif, bool exact_dif)
@@ -207,6 +262,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
*/
/* called with rcu_read_lock() : No refcount taken on the socket */
+static struct sock *inet_lhash2_lookup(struct net *net,
+ struct inet_listen_hashbucket *ilb2,
+ struct sk_buff *skb, int doff,
+ const __be32 saddr, __be16 sport,
+ const __be32 daddr, const unsigned short hnum,
+ const int dif, const int sdif)
+{
+ bool exact_dif = inet_exact_dif_match(net, skb);
+ struct inet_connection_sock *icsk;
+ struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ u32 phash = 0;
+
+ inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ score = compute_score(sk, net, hnum, daddr,
+ dif, sdif, exact_dif);
+ if (score > hiscore) {
+ if (sk->sk_reuseport) {
+ phash = inet_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ result = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (result)
+ return result;
+ }
+ result = sk;
+ hiscore = score;
+ }
+ }
+
+ return result;
+}
+
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -216,32 +305,57 @@ struct sock *__inet_lookup_listener(struct net *net,
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- int score, hiscore = 0, matches = 0, reuseport = 0;
bool exact_dif = inet_exact_dif_match(net, skb);
+ struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ unsigned int hash2;
u32 phash = 0;
+ if (ilb->count <= 10 || !hashinfo->lhash2)
+ goto port_lookup;
+
+ /* Too many sk in the ilb bucket (which is hashed by port alone).
+ * Try lhash2 (which is hashed by port and addr) instead.
+ */
+
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ result = inet_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+ if (result)
+ return result;
+
+ /* Lookup lhash2 with INADDR_ANY */
+
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ return inet_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+
+port_lookup:
sk_for_each_rcu(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
- matches = 1;
}
result = sk;
hiscore = score;
- } else if (score == hiscore && reuseport) {
- matches++;
- if (reciprocal_scale(phash, matches) == 0)
- result = sk;
- phash = next_pseudo_random32(phash);
}
}
return result;
@@ -430,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
} else {
percpu_counter_inc(sk->sk_prot->orphan_count);
- sk->sk_state = TCP_CLOSE;
+ inet_sk_set_state(sk, TCP_CLOSE);
sock_set_flag(sk, SOCK_DEAD);
inet_csk_destroy_sock(sk);
}
@@ -483,6 +597,8 @@ int __inet_hash(struct sock *sk, struct sock *osk)
hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
else
hlist_add_head_rcu(&sk->sk_node, &ilb->head);
+ inet_hash2(hashinfo, sk);
+ ilb->count++;
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
@@ -509,28 +625,33 @@ EXPORT_SYMBOL_GPL(inet_hash);
void inet_unhash(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+ struct inet_listen_hashbucket *ilb = NULL;
spinlock_t *lock;
- bool listener = false;
- int done;
if (sk_unhashed(sk))
return;
if (sk->sk_state == TCP_LISTEN) {
- lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
- listener = true;
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ lock = &ilb->lock;
} else {
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
}
spin_lock_bh(lock);
+ if (sk_unhashed(sk))
+ goto unlock;
+
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
- if (listener)
- done = __sk_del_node_init(sk);
- else
- done = __sk_nulls_del_node_init_rcu(sk);
- if (done)
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ if (ilb) {
+ inet_unhash2(hashinfo, sk);
+ __sk_del_node_init(sk);
+ ilb->count--;
+ } else {
+ __sk_nulls_del_node_init_rcu(sk);
+ }
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+unlock:
spin_unlock_bh(lock);
}
EXPORT_SYMBOL_GPL(inet_unhash);
@@ -665,10 +786,37 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_HEAD(&h->listening_hash[i].head);
+ h->listening_hash[i].count = 0;
}
+
+ h->lhash2 = NULL;
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
+void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
+ unsigned long numentries, int scale,
+ unsigned long low_limit,
+ unsigned long high_limit)
+{
+ unsigned int i;
+
+ h->lhash2 = alloc_large_system_hash(name,
+ sizeof(*h->lhash2),
+ numentries,
+ scale,
+ 0,
+ NULL,
+ &h->lhash2_mask,
+ low_limit,
+ high_limit);
+
+ for (i = 0; i <= h->lhash2_mask; i++) {
+ spin_lock_init(&h->lhash2[i].lock);
+ INIT_HLIST_HEAD(&h->lhash2[i].head);
+ h->lhash2[i].count = 0;
+ }
+}
+
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
unsigned int locksz = sizeof(spinlock_t);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c690cd0d9b3f..c3ea4906d237 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -93,11 +93,11 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
}
/*
- * Enter the time wait state.
+ * Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
* from the SK, and mess with hash chains and list linkage.
*/
-void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
struct inet_hashinfo *hashinfo)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
- spin_lock_bh(&bhead->lock);
+ spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
@@ -119,27 +119,26 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
spin_lock(lock);
- /*
- * Step 2: Hash TW into tcp ehash chain.
- * Notes :
- * - tw_refcnt is set to 4 because :
- * - We have one reference from bhash chain.
- * - We have one reference from ehash chain.
- * - We have one reference from timer.
- * - One reference for ourself (our caller will release it).
- * We can use atomic_set() because prior spin_lock()/spin_unlock()
- * committed into memory all tw fields.
- */
- refcount_set(&tw->tw_refcnt, 4);
inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- spin_unlock_bh(lock);
+ spin_unlock(lock);
+
+ /* tw_refcnt is set to 3 because we have :
+ * - one reference for bhash chain.
+ * - one reference for ehash chain.
+ * - one reference for timer.
+ * We can use atomic_set() because prior spin_lock()/spin_unlock()
+ * committed into memory all tw fields.
+ * Also note that after this point, we lost our implicit reference
+ * so we are not allowed to use tw anymore.
+ */
+ refcount_set(&tw->tw_refcnt, 3);
}
-EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
+EXPORT_SYMBOL_GPL(inet_twsk_hashdance);
static void tw_timer_handler(struct timer_list *t)
{
@@ -271,14 +270,14 @@ restart:
continue;
tw = inet_twsk(sk);
if ((tw->tw_family != family) ||
- atomic_read(&twsk_net(tw)->count))
+ refcount_read(&twsk_net(tw)->count))
continue;
if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
continue;
if (unlikely((tw->tw_family != family) ||
- atomic_read(&twsk_net(tw)->count))) {
+ refcount_read(&twsk_net(tw)->count))) {
inet_twsk_put(tw);
goto restart;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bb6239169b1a..6ec670fbbbdd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -114,7 +114,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index, bool truncate);
+ u32 id, u32 index,
+ bool truncate, bool is_ipv4);
static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
@@ -255,34 +256,43 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
{
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *pkt_md;
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
- struct erspanhdr *ershdr;
const struct iphdr *iph;
- __be32 index;
+ int ver;
int len;
itn = net_generic(net, erspan_net_id);
len = gre_hdr_len + sizeof(*ershdr);
+ /* Check based hdr len */
if (unlikely(!pskb_may_pull(skb, len)))
- return -ENOMEM;
+ return PACKET_REJECT;
iph = ip_hdr(skb);
- ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+ ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ ver = ershdr->ver;
/* The original GRE header does not have key field,
* Use ERSPAN 10-bit session ID as key.
*/
- tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
- index = ershdr->md.index;
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
+ len = gre_hdr_len + erspan_hdr_len(ver);
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return PACKET_REJECT;
+
+ ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ pkt_md = (struct erspan_metadata *)(ershdr + 1);
+
if (__iptunnel_pull_header(skb,
- gre_hdr_len + sizeof(*ershdr),
+ len,
htons(ETH_P_TEB),
false, false) < 0)
goto drop;
@@ -303,15 +313,21 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
return PACKET_REJECT;
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
- if (!md)
- return PACKET_REJECT;
+ memcpy(md, pkt_md, sizeof(*md));
+ md->version = ver;
- md->index = index;
info = &tun_dst->u.tun_info;
info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
info->options_len = sizeof(*md);
} else {
- tunnel->index = ntohl(index);
+ tunnel->erspan_ver = ver;
+ if (ver == 1) {
+ tunnel->index = ntohl(pkt_md->u.index);
+ } else {
+ tunnel->dir = pkt_md->u.md2.dir;
+ tunnel->hwid = get_hwid(&pkt_md->u.md2);
+ }
+
}
skb_reset_mac_header(skb);
@@ -405,14 +421,17 @@ static int gre_rcv(struct sk_buff *skb)
if (hdr_len < 0)
goto drop;
- if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ tpi.proto == htons(ETH_P_ERSPAN2))) {
if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
+ goto out;
}
if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
+out:
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
drop:
kfree_skb(skb);
@@ -560,6 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
bool truncate = false;
struct flowi4 fl;
int tunnel_hlen;
+ int version;
__be16 df;
tun_info = skb_tunnel_info(skb);
@@ -568,9 +588,13 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
goto err_free_skb;
key = &tun_info->key;
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto err_free_rt;
/* ERSPAN has fixed 8 byte GRE header */
- tunnel_hlen = 8 + sizeof(struct erspanhdr);
+ version = md->version;
+ tunnel_hlen = 8 + erspan_hdr_len(version);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
if (!rt)
@@ -584,12 +608,18 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
truncate = true;
}
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
+ if (version == 1) {
+ erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
+ ntohl(md->u.index), truncate, true);
+ } else if (version == 2) {
+ erspan_build_header_v2(skb,
+ ntohl(tunnel_id_to_key32(key->tun_id)),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, true);
+ } else {
goto err_free_rt;
-
- erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
- ntohl(md->index), truncate);
+ }
gre_build_header(skb, 8, TUNNEL_SEQ,
htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
@@ -668,52 +698,6 @@ free_skb:
return NETDEV_TX_OK;
}
-static inline u8 tos_to_cos(u8 tos)
-{
- u8 dscp, cos;
-
- dscp = tos >> 2;
- cos = dscp >> 3;
- return cos;
-}
-
-static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index, bool truncate)
-{
- struct iphdr *iphdr = ip_hdr(skb);
- struct ethhdr *eth = eth_hdr(skb);
- enum erspan_encap_type enc_type;
- struct erspanhdr *ershdr;
- struct qtag_prefix {
- __be16 eth_type;
- __be16 tci;
- } *qp;
- u16 vlan_tci = 0;
-
- enc_type = ERSPAN_ENCAP_NOVLAN;
-
- /* If mirrored packet has vlan tag, extract tci and
- * perserve vlan header in the mirrored frame.
- */
- if (eth->h_proto == htons(ETH_P_8021Q)) {
- qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
- vlan_tci = ntohs(qp->tci);
- enc_type = ERSPAN_ENCAP_INFRAME;
- }
-
- skb_push(skb, sizeof(*ershdr));
- ershdr = (struct erspanhdr *)skb->data;
- memset(ershdr, 0, sizeof(*ershdr));
-
- ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
- (ERSPAN_VERSION << VER_OFFSET));
- ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
- ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
- (enc_type << EN_OFFSET & EN_MASK) |
- ((truncate << T_OFFSET) & T_MASK));
- ershdr->md.index = htonl(index & INDEX_MASK);
-}
-
static netdev_tx_t erspan_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@@ -737,7 +721,15 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
}
/* Push ERSPAN header */
- erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+ if (tunnel->erspan_ver == 1)
+ erspan_build_header(skb, ntohl(tunnel->parms.o_key),
+ tunnel->index,
+ truncate, true);
+ else
+ erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
+ tunnel->dir, tunnel->hwid,
+ truncate, true);
+
tunnel->parms.o_flags &= ~TUNNEL_KEY;
__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
return NETDEV_TX_OK;
@@ -1209,13 +1201,32 @@ static int ipgre_netlink_parms(struct net_device *dev,
if (data[IFLA_GRE_FWMARK])
*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
- if (data[IFLA_GRE_ERSPAN_INDEX]) {
- t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ if (data[IFLA_GRE_ERSPAN_VER]) {
+ t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
- if (t->index & ~INDEX_MASK)
+ if (t->erspan_ver != 1 && t->erspan_ver != 2)
return -EINVAL;
}
+ if (t->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ if (t->index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+ } else if (t->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR]) {
+ t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
+ return -EINVAL;
+ }
+ if (data[IFLA_GRE_ERSPAN_HWID]) {
+ t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -1282,7 +1293,7 @@ static int erspan_tunnel_init(struct net_device *dev)
tunnel->tun_hlen = 8;
tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
- sizeof(struct erspanhdr);
+ erspan_hdr_len(tunnel->erspan_ver);
t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
@@ -1310,6 +1321,7 @@ static const struct net_device_ops erspan_netdev_ops = {
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &gre_tap_netdev_ops;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
@@ -1412,6 +1424,12 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_GRE_ERSPAN_INDEX */
nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_VER */
+ nla_total_size(1) +
+ /* IFLA_GRE_ERSPAN_DIR */
+ nla_total_size(1) +
+ /* IFLA_GRE_ERSPAN_HWID */
+ nla_total_size(2) +
0;
}
@@ -1454,9 +1472,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
}
- if (t->index)
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ goto nla_put_failure;
+
+ if (t->erspan_ver == 1) {
if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
goto nla_put_failure;
+ } else if (t->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ goto nla_put_failure;
+ }
return 0;
@@ -1492,6 +1519,9 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
[IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 60fb1eb7d7d8..008be04ac1cc 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
{
struct net_device *dev = NULL;
int ifindex;
+ int midx;
if (optlen != sizeof(int))
goto e_inval;
@@ -823,10 +824,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EADDRNOTAVAIL;
if (!dev)
break;
+
+ midx = l3mdev_master_ifindex(dev);
dev_put(dev);
err = -EINVAL;
- if (sk->sk_bound_dev_if)
+ if (sk->sk_bound_dev_if &&
+ (!midx || midx != sk->sk_bound_dev_if))
break;
inet->uc_index = ifindex;
@@ -1251,11 +1255,8 @@ int ip_setsockopt(struct sock *sk, int level,
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
optname != IP_IPSEC_POLICY &&
optname != IP_XFRM_POLICY &&
- !ip_mroute_opt(optname)) {
- lock_sock(sk);
+ !ip_mroute_opt(optname))
err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
- release_sock(sk);
- }
#endif
return err;
}
@@ -1280,12 +1281,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
optname != IP_IPSEC_POLICY &&
optname != IP_XFRM_POLICY &&
- !ip_mroute_opt(optname)) {
- lock_sock(sk);
- err = compat_nf_setsockopt(sk, PF_INET, optname,
- optval, optlen);
- release_sock(sk);
- }
+ !ip_mroute_opt(optname))
+ err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
+ optlen);
#endif
return err;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce4..d786a8441bce 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
@@ -520,8 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
else
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) &&
@@ -711,9 +710,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ if (tunnel->fwmark) {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
+ }
+ else {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ skb->mark);
+ }
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 949f432a5f04..51b1669334fe 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -200,7 +200,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
mtu = dst_mtu(dst);
if (skb->len > mtu) {
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index abdebca848c9..f75802ad960f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
sin->sin_port = port;
}
-static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
/*
* Set up interface addresses and routes.
*/
@@ -375,19 +342,19 @@ static int __init ic_setup_if(void)
memset(&ir, 0, sizeof(ir));
strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
set_sockaddr(sin, ic_myaddr, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface address (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface netmask (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface broadcast address (%d)\n",
err);
return -1;
@@ -397,11 +364,11 @@ static int __init ic_setup_if(void)
* out, we'll try to muddle along.
*/
if (ic_dev_mtu != 0) {
- strcpy(ir.ifr_name, ic_dev->dev->name);
- ir.ifr_mtu = ic_dev_mtu;
- if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
+ rtnl_lock();
+ if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0)
pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
ic_dev_mtu, err);
+ rtnl_unlock();
}
return 0;
}
@@ -423,7 +390,7 @@ static int __init ic_setup_routes(void)
set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
rm.rt_flags = RTF_UP | RTF_GATEWAY;
- if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
+ if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) {
pr_err("IP-Config: Cannot add default route (%d)\n",
err);
return -1;
@@ -1322,7 +1289,6 @@ static int pnp_seq_open(struct inode *indoe, struct file *file)
}
static const struct file_operations pnp_seq_fops = {
- .owner = THIS_MODULE,
.open = pnp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index fd5f19c988e4..b05689bbba31 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3022,7 +3022,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
- "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
+ "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
@@ -3045,7 +3045,6 @@ static int ipmr_vif_open(struct inode *inode, struct file *file)
}
static const struct file_operations ipmr_vif_fops = {
- .owner = THIS_MODULE,
.open = ipmr_vif_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -3198,7 +3197,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file)
}
static const struct file_operations ipmr_mfc_fops = {
- .owner = THIS_MODULE,
.open = ipmr_mfc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c0cc6aa8cfaa..e6774ccb7731 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -80,35 +80,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
}
EXPORT_SYMBOL(ip_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip_rt_info {
- __be32 daddr;
- __be32 saddr;
- u_int8_t tos;
- u_int32_t mark;
-};
-
-static void nf_ip_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(skb);
-
- rt_info->tos = iph->tos;
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
-static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry)
+int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
{
const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -119,10 +91,12 @@ static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
skb->mark == rt_info->mark &&
iph->daddr == rt_info->daddr &&
iph->saddr == rt_info->saddr))
- return ip_route_me_harder(net, skb, RTN_UNSPEC);
+ return ip_route_me_harder(entry->state.net, skb,
+ RTN_UNSPEC);
}
return 0;
}
+EXPORT_SYMBOL_GPL(nf_ip_reroute);
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
@@ -155,9 +129,9 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
}
EXPORT_SYMBOL(nf_ip_checksum);
-static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol)
+__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
@@ -175,9 +149,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
}
return csum;
}
+EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
-static int nf_ip_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict __always_unused)
+int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict __always_unused)
{
struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
if (IS_ERR(rt))
@@ -185,19 +160,4 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst,
*dst = &rt->dst;
return 0;
}
-
-static const struct nf_afinfo nf_ip_afinfo = {
- .family = AF_INET,
- .checksum = nf_ip_checksum,
- .checksum_partial = nf_ip_checksum_partial,
- .route = nf_ip_route,
- .saveroute = nf_ip_saveroute,
- .reroute = nf_ip_reroute,
- .route_key_size = sizeof(struct ip_rt_info),
-};
-
-static int __init ipv4_netfilter_init(void)
-{
- return nf_register_afinfo(&nf_ip_afinfo);
-}
-subsys_initcall(ipv4_netfilter_init);
+EXPORT_SYMBOL_GPL(nf_ip_route);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c11eb1744ab1..5f52236780b4 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -72,11 +72,21 @@ endif # NF_TABLES_IPV4
config NF_TABLES_ARP
tristate "ARP nf_tables support"
+ select NETFILTER_FAMILY_ARP
help
This option enables the ARP support for nf_tables.
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV4
+ tristate "Netfilter flow table IPv4 module"
+ depends on NF_CONNTRACK && NF_TABLES
+ select NF_FLOW_TABLE
+ help
+ This option adds the flow table IPv4 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -148,6 +158,7 @@ config NF_NAT_SNMP_BASIC
depends on NF_CONNTRACK_SNMP
depends on NETFILTER_ADVANCED
default NF_NAT && NF_CONNTRACK_SNMP
+ select ASN1
---help---
This module implements an Application Layer Gateway (ALG) for
@@ -333,6 +344,7 @@ config IP_NF_TARGET_CLUSTERIP
depends on NF_CONNTRACK_IPV4
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
+ select NETFILTER_FAMILY_ARP
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
@@ -392,6 +404,7 @@ endif # IP_NF_IPTABLES
config IP_NF_ARPTABLES
tristate "ARP tables support"
select NETFILTER_XTABLES
+ select NETFILTER_FAMILY_ARP
depends on NETFILTER_ADVANCED
help
arptables is a general, extensible packet identification framework.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index adcdae358365..2dad20eefd26 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,9 +27,15 @@ obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
# NAT helpers (nf_conntrack)
obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
+
+nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o
+nf_nat_snmp_basic-y : nf_nat_snmp_basic-asn1.h nf_nat_snmp_basic-asn1.c
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
+clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h
+
obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
+
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
@@ -43,6 +49,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f88221aebc9d..4ffe302f9b82 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -202,13 +202,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
- private = table->private;
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
- /*
- * Ensure we load private-> members after we've fetched the base
- * pointer.
- */
- smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
@@ -373,7 +368,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -811,9 +805,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(NFPROTO_ARP);
#endif
- t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
- "arptable_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
+ if (!IS_ERR(t)) {
struct arpt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -842,7 +835,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(NFPROTO_ARP);
@@ -867,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
@@ -879,7 +872,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -904,10 +897,9 @@ static int __do_replace(struct net *net, const char *name,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
- "arptable_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1021,8 +1013,8 @@ static int do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1409,7 +1401,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
@@ -1424,7 +1416,7 @@ static int compat_get_entries(struct net *net,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(NFPROTO_ARP);
return ret;
@@ -1659,7 +1651,6 @@ static int __init arp_tables_init(void)
if (ret < 0)
goto err4;
- pr_info("arp_tables: (C) 2002 David S. Miller\n");
return 0;
err4:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4cbe5e80f3bf..9a71f3149507 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -260,13 +260,8 @@ ipt_do_table(struct sk_buff *skb,
WARN_ON(!(table->valid_hooks & (1 << hook)));
local_bh_disable();
addend = xt_write_recseq_begin();
- private = table->private;
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
- /*
- * Ensure we load private-> members after we've fetched the base
- * pointer.
- */
- smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
@@ -439,7 +434,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -974,9 +968,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(AF_INET);
#endif
- t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, AF_INET, name);
+ if (!IS_ERR(t)) {
struct ipt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1006,7 +999,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET);
@@ -1031,7 +1024,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1042,7 +1035,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -1065,10 +1058,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, AF_INET, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1182,8 +1174,8 @@ do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1626,7 +1618,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1640,7 +1632,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(AF_INET);
return ret;
@@ -1942,7 +1934,6 @@ static int __init ip_tables_init(void)
if (ret < 0)
goto err5;
- pr_info("(C) 2000-2006 Netfilter Core Team\n");
return 0;
err5:
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 17b4ca562944..3a84a60f6b39 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -431,7 +431,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
struct clusterip_config *config;
- int ret;
+ int ret, i;
if (par->nft_compat) {
pr_err("cannot use CLUSTERIP target from nftables compat\n");
@@ -450,8 +450,18 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
pr_info("Please specify destination IP\n");
return -EINVAL;
}
-
- /* FIXME: further sanity checks */
+ if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
+ pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
+ return -EINVAL;
+ }
+ for (i = 0; i < cipinfo->num_local_nodes; i++) {
+ if (cipinfo->local_nodes[i] - 1 >=
+ sizeof(config->local_nodes) * 8) {
+ pr_info("bad local_nodes[%d] %u\n",
+ i, cipinfo->local_nodes[i]);
+ return -EINVAL;
+ }
+ }
config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
if (!config) {
@@ -776,7 +786,6 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
}
static const struct file_operations clusterip_proc_fops = {
- .owner = THIS_MODULE,
.open = clusterip_proc_open,
.read = seq_read,
.write = clusterip_proc_write,
@@ -813,12 +822,13 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net)
{
-#ifdef CONFIG_PROC_FS
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+#ifdef CONFIG_PROC_FS
proc_remove(cn->procdir);
cn->procdir = NULL;
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
+ WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7667f223d7f8..9ac92ea7b93c 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -38,12 +38,6 @@ static unsigned int
iptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index aebdb337fd7e..dea138ca8925 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
u_int32_t mark;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
/* Save things which could affect route */
mark = skb->mark;
iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a1a07b338ccf..0f7255cc65ee 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_in,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_out,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
@@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_local_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
},
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 2642ecd2645c..960625aabf04 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/slab.h>
@@ -12,6 +13,10 @@
static int __net_init iptable_raw_table_init(struct net *net);
+static bool raw_before_defrag __read_mostly;
+MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
+module_param(raw_before_defrag, bool, 0000);
+
static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
@@ -21,17 +26,20 @@ static const struct xt_table packet_raw = {
.table_init = iptable_raw_table_init,
};
+static const struct xt_table packet_raw_before_defrag = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
+ .table_init = iptable_raw_table_init,
+};
+
/* The work comes in here from netfilter.c. */
static unsigned int
iptable_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
}
@@ -40,15 +48,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
static int __net_init iptable_raw_table_init(struct net *net)
{
struct ipt_replace *repl;
+ const struct xt_table *table = &packet_raw;
int ret;
+ if (raw_before_defrag)
+ table = &packet_raw_before_defrag;
+
if (net->ipv4.iptable_raw)
return 0;
- repl = ipt_alloc_initial_table(&packet_raw);
+ repl = ipt_alloc_initial_table(table);
if (repl == NULL)
return -ENOMEM;
- ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops,
+ ret = ipt_register_table(net, table, repl, rawtable_ops,
&net->ipv4.iptable_raw);
kfree(repl);
return ret;
@@ -69,8 +81,15 @@ static struct pernet_operations iptable_raw_net_ops = {
static int __init iptable_raw_init(void)
{
int ret;
+ const struct xt_table *table = &packet_raw;
+
+ if (raw_before_defrag) {
+ table = &packet_raw_before_defrag;
+
+ pr_info("Enabling raw table before defrag\n");
+ }
- rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index ff226596e4b5..e5379fe57b64 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -43,12 +43,6 @@ static unsigned int
iptable_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* Somebody is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 89af9d88ca21..b50721d9d30e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
return NF_ACCEPT;
@@ -218,15 +213,19 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
struct nf_conntrack_tuple tuple;
memset(&tuple, 0, sizeof(tuple));
+
+ lock_sock(sk);
tuple.src.u3.ip = inet->inet_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.ip = inet->inet_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.src.l3num = PF_INET;
tuple.dst.protonum = sk->sk_protocol;
+ release_sock(sk);
/* We only do TCP and SCTP at the moment: is there a better way? */
- if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+ if (tuple.dst.protonum != IPPROTO_TCP &&
+ tuple.dst.protonum != IPPROTO_SCTP) {
pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
@@ -368,7 +367,7 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
-static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
+static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_icmp,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 1849fedd9b81..5c15beafa711 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -22,7 +22,7 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_log.h>
-static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
static inline struct nf_icmp_net *icmp_pernet(struct net *net)
{
@@ -351,7 +351,7 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.icmp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 37fe1616ca0b..a0d3ad60a411 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -78,6 +78,8 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
return NF_ACCEPT;
#endif
+ if (skb->_nfct == IP_CT_UNTRACKED)
+ return NF_ACCEPT;
#endif
/* Gather fragments. */
if (ip_is_fragment(ip_hdr(skb))) {
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
new file mode 100644
index 000000000000..b2d01eb25f2c
--- /dev/null
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -0,0 +1,284 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace4(&udph->check, skb, addr,
+ new_addr, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, __be32 addr,
+ __be32 new_addr)
+{
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+ csum_replace4(&iph->check, addr, new_addr);
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned int thoff = iph->ihl * 4;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+ return thoff != sizeof(struct iphdr);
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return -1;
+
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+
+ if (ip_is_fragment(iph) ||
+ unlikely(ip_has_options(thoff)))
+ return -1;
+
+ if (iph->protocol != IPPROTO_TCP &&
+ iph->protocol != IPPROTO_UDP)
+ return -1;
+
+ thoff = iph->ihl * 4;
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v4.s_addr = iph->saddr;
+ tuple->dst_v4.s_addr = iph->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
+{
+ u32 mtu;
+
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ const struct rtable *rt;
+ struct iphdr *iph;
+ __be32 nexthop;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*iph)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ip(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ iph = ip_hdr(skb);
+ ip_decrease_ttl(iph);
+
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+static struct nf_flowtable_type flowtable_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ip_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv4_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv4);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv4_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv4);
+}
+
+module_init(nf_flow_ipv4_module_init);
+module_exit(nf_flow_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 0443ca4120b0..f7ff6a364d7b 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -356,11 +356,6 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -396,11 +391,6 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
new file mode 100644
index 000000000000..24b73268f362
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
@@ -0,0 +1,177 @@
+Message ::=
+ SEQUENCE {
+ version
+ INTEGER ({snmp_version}),
+
+ community
+ OCTET STRING,
+
+ pdu
+ PDUs
+ }
+
+
+ObjectName ::=
+ OBJECT IDENTIFIER
+
+ObjectSyntax ::=
+ CHOICE {
+ simple
+ SimpleSyntax,
+
+ application-wide
+ ApplicationSyntax
+ }
+
+SimpleSyntax ::=
+ CHOICE {
+ integer-value
+ INTEGER,
+
+ string-value
+ OCTET STRING,
+
+ objectID-value
+ OBJECT IDENTIFIER
+ }
+
+ApplicationSyntax ::=
+ CHOICE {
+ ipAddress-value
+ IpAddress,
+
+ counter-value
+ Counter32,
+
+ timeticks-value
+ TimeTicks,
+
+ arbitrary-value
+ Opaque,
+
+ big-counter-value
+ Counter64,
+
+ unsigned-integer-value
+ Unsigned32
+ }
+
+IpAddress ::=
+ [APPLICATION 0]
+ IMPLICIT OCTET STRING OPTIONAL ({snmp_helper})
+
+Counter32 ::=
+ [APPLICATION 1]
+ IMPLICIT INTEGER OPTIONAL
+
+Unsigned32 ::=
+ [APPLICATION 2]
+ IMPLICIT INTEGER OPTIONAL
+
+Gauge32 ::= Unsigned32 OPTIONAL
+
+TimeTicks ::=
+ [APPLICATION 3]
+ IMPLICIT INTEGER OPTIONAL
+
+Opaque ::=
+ [APPLICATION 4]
+ IMPLICIT OCTET STRING OPTIONAL
+
+Counter64 ::=
+ [APPLICATION 6]
+ IMPLICIT INTEGER OPTIONAL
+
+PDUs ::=
+ CHOICE {
+ get-request
+ GetRequest-PDU,
+
+ get-next-request
+ GetNextRequest-PDU,
+
+ get-bulk-request
+ GetBulkRequest-PDU,
+
+ response
+ Response-PDU,
+
+ set-request
+ SetRequest-PDU,
+
+ inform-request
+ InformRequest-PDU,
+
+ snmpV2-trap
+ SNMPv2-Trap-PDU,
+
+ report
+ Report-PDU
+ }
+
+GetRequest-PDU ::=
+ [0] IMPLICIT PDU OPTIONAL
+
+GetNextRequest-PDU ::=
+ [1] IMPLICIT PDU OPTIONAL
+
+Response-PDU ::=
+ [2] IMPLICIT PDU OPTIONAL
+
+SetRequest-PDU ::=
+ [3] IMPLICIT PDU OPTIONAL
+
+-- [4] is obsolete
+
+GetBulkRequest-PDU ::=
+ [5] IMPLICIT PDU OPTIONAL
+
+InformRequest-PDU ::=
+ [6] IMPLICIT PDU OPTIONAL
+
+SNMPv2-Trap-PDU ::=
+ [7] IMPLICIT PDU OPTIONAL
+
+Report-PDU ::=
+ [8] IMPLICIT PDU OPTIONAL
+
+PDU ::=
+ SEQUENCE {
+ request-id
+ INTEGER,
+
+ error-status
+ INTEGER,
+
+ error-index
+ INTEGER,
+
+ variable-bindings
+ VarBindList
+ }
+
+
+VarBind ::=
+ SEQUENCE {
+ name
+ ObjectName,
+
+ CHOICE {
+ value
+ ObjectSyntax,
+
+ unSpecified
+ NULL,
+
+ noSuchObject
+ [0] IMPLICIT NULL,
+
+ noSuchInstance
+ [1] IMPLICIT NULL,
+
+ endOfMibView
+ [2] IMPLICIT NULL
+ }
+}
+
+VarBindList ::= SEQUENCE OF VarBind
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
deleted file mode 100644
index d5b1e0b3f687..000000000000
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ /dev/null
@@ -1,1286 +0,0 @@
-/*
- * nf_nat_snmp_basic.c
- *
- * Basic SNMP Application Layer Gateway
- *
- * This IP NAT module is intended for use with SNMP network
- * discovery and monitoring applications where target networks use
- * conflicting private address realms.
- *
- * Static NAT is used to remap the networks from the view of the network
- * management system at the IP layer, and this module remaps some application
- * layer addresses to match.
- *
- * The simplest form of ALG is performed, where only tagged IP addresses
- * are modified. The module does not need to be MIB aware and only scans
- * messages at the ASN.1/BER level.
- *
- * Currently, only SNMPv1 and SNMPv2 are supported.
- *
- * More information on ALG and associated issues can be found in
- * RFC 2962
- *
- * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
- * McLean & Jochen Friedrich, stripped down for use in the kernel.
- *
- * Copyright (c) 2000 RP Internet (www.rpi.net.au).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * Author: James Morris <jmorris@intercode.com.au>
- *
- * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <linux/netfilter/nf_conntrack_snmp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
-MODULE_ALIAS("ip_nat_snmp_basic");
-
-#define SNMP_PORT 161
-#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (*(u8 *)(n))
-
-static int debug;
-static DEFINE_SPINLOCK(snmp_lock);
-
-/*
- * Application layer address mapping mimics the NAT mapping, but
- * only for the first octet in this case (a more flexible system
- * can be implemented if needed).
- */
-struct oct1_map
-{
- u_int8_t from;
- u_int8_t to;
-};
-
-
-/*****************************************************************************
- *
- * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* Class */
-#define ASN1_UNI 0 /* Universal */
-#define ASN1_APL 1 /* Application */
-#define ASN1_CTX 2 /* Context */
-#define ASN1_PRV 3 /* Private */
-
-/* Tag */
-#define ASN1_EOC 0 /* End Of Contents */
-#define ASN1_BOL 1 /* Boolean */
-#define ASN1_INT 2 /* Integer */
-#define ASN1_BTS 3 /* Bit String */
-#define ASN1_OTS 4 /* Octet String */
-#define ASN1_NUL 5 /* Null */
-#define ASN1_OJI 6 /* Object Identifier */
-#define ASN1_OJD 7 /* Object Description */
-#define ASN1_EXT 8 /* External */
-#define ASN1_SEQ 16 /* Sequence */
-#define ASN1_SET 17 /* Set */
-#define ASN1_NUMSTR 18 /* Numerical String */
-#define ASN1_PRNSTR 19 /* Printable String */
-#define ASN1_TEXSTR 20 /* Teletext String */
-#define ASN1_VIDSTR 21 /* Video String */
-#define ASN1_IA5STR 22 /* IA5 String */
-#define ASN1_UNITIM 23 /* Universal Time */
-#define ASN1_GENTIM 24 /* General Time */
-#define ASN1_GRASTR 25 /* Graphical String */
-#define ASN1_VISSTR 26 /* Visible String */
-#define ASN1_GENSTR 27 /* General String */
-
-/* Primitive / Constructed methods*/
-#define ASN1_PRI 0 /* Primitive */
-#define ASN1_CON 1 /* Constructed */
-
-/*
- * Error codes.
- */
-#define ASN1_ERR_NOERROR 0
-#define ASN1_ERR_DEC_EMPTY 2
-#define ASN1_ERR_DEC_EOC_MISMATCH 3
-#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
-#define ASN1_ERR_DEC_BADVALUE 5
-
-/*
- * ASN.1 context.
- */
-struct asn1_ctx
-{
- int error; /* Error condition */
- unsigned char *pointer; /* Octet just to be decoded */
- unsigned char *begin; /* First octet */
- unsigned char *end; /* Octet after last octet */
-};
-
-/*
- * Octet string (not null terminated)
- */
-struct asn1_octstr
-{
- unsigned char *data;
- unsigned int len;
-};
-
-static void asn1_open(struct asn1_ctx *ctx,
- unsigned char *buf,
- unsigned int len)
-{
- ctx->begin = buf;
- ctx->end = buf + len;
- ctx->pointer = buf;
- ctx->error = ASN1_ERR_NOERROR;
-}
-
-static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
-{
- if (ctx->pointer >= ctx->end) {
- ctx->error = ASN1_ERR_DEC_EMPTY;
- return 0;
- }
- *ch = *(ctx->pointer)++;
- return 1;
-}
-
-static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
-{
- unsigned char ch;
-
- *tag = 0;
-
- do
- {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *tag <<= 7;
- *tag |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned char ch;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *cls = (ch & 0xC0) >> 6;
- *con = (ch & 0x20) >> 5;
- *tag = (ch & 0x1F);
-
- if (*tag == 0x1F) {
- if (!asn1_tag_decode(ctx, tag))
- return 0;
- }
- return 1;
-}
-
-static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
- unsigned int *def,
- unsigned int *len)
-{
- unsigned char ch, cnt;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch == 0x80)
- *def = 0;
- else {
- *def = 1;
-
- if (ch < 0x80)
- *len = ch;
- else {
- cnt = ch & 0x7F;
- *len = 0;
-
- while (cnt > 0) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *len <<= 8;
- *len |= ch;
- cnt--;
- }
- }
- }
-
- /* don't trust len bigger than ctx buffer */
- if (*len > ctx->end - ctx->pointer)
- return 0;
-
- return 1;
-}
-
-static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
- unsigned char **eoc,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned int def, len;
-
- if (!asn1_id_decode(ctx, cls, con, tag))
- return 0;
-
- def = len = 0;
- if (!asn1_length_decode(ctx, &def, &len))
- return 0;
-
- /* primitive shall be definite, indefinite shall be constructed */
- if (*con == ASN1_PRI && !def)
- return 0;
-
- if (def)
- *eoc = ctx->pointer + len;
- else
- *eoc = NULL;
- return 1;
-}
-
-static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- unsigned char ch;
-
- if (eoc == NULL) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
- return 1;
- } else {
- if (ctx->pointer != eoc) {
- ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
- return 0;
- }
- return 1;
- }
-}
-
-static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- ctx->pointer = eoc;
- return 1;
-}
-
-static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = (signed char) ch;
- len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned int *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned int)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned char **octets,
- unsigned int *len)
-{
- unsigned char *ptr;
-
- *len = 0;
-
- *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
- if (*octets == NULL)
- return 0;
-
- ptr = *octets;
- while (ctx->pointer < eoc) {
- if (!asn1_octet_decode(ctx, ptr++)) {
- kfree(*octets);
- *octets = NULL;
- return 0;
- }
- (*len)++;
- }
- return 1;
-}
-
-static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
- unsigned long *subid)
-{
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long **oid,
- unsigned int *len)
-{
- unsigned long subid;
- unsigned long *optr;
- size_t size;
-
- size = eoc - ctx->pointer + 1;
-
- /* first subid actually encodes first two subids */
- if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
- return 0;
-
- *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
- if (*oid == NULL)
- return 0;
-
- optr = *oid;
-
- if (!asn1_subid_decode(ctx, &subid)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (subid < 40) {
- optr[0] = 0;
- optr[1] = subid;
- } else if (subid < 80) {
- optr[0] = 1;
- optr[1] = subid - 40;
- } else {
- optr[0] = 2;
- optr[1] = subid - 80;
- }
-
- *len = 2;
- optr += 2;
-
- while (ctx->pointer < eoc) {
- if (++(*len) > size) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (!asn1_subid_decode(ctx, optr++)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
- }
- return 1;
-}
-
-/*****************************************************************************
- *
- * SNMP decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* SNMP Versions */
-#define SNMP_V1 0
-#define SNMP_V2C 1
-#define SNMP_V2 2
-#define SNMP_V3 3
-
-/* Default Sizes */
-#define SNMP_SIZE_COMM 256
-#define SNMP_SIZE_OBJECTID 128
-#define SNMP_SIZE_BUFCHR 256
-#define SNMP_SIZE_BUFINT 128
-#define SNMP_SIZE_SMALLOBJECTID 16
-
-/* Requests */
-#define SNMP_PDU_GET 0
-#define SNMP_PDU_NEXT 1
-#define SNMP_PDU_RESPONSE 2
-#define SNMP_PDU_SET 3
-#define SNMP_PDU_TRAP1 4
-#define SNMP_PDU_BULK 5
-#define SNMP_PDU_INFORM 6
-#define SNMP_PDU_TRAP2 7
-
-/* Errors */
-#define SNMP_NOERROR 0
-#define SNMP_TOOBIG 1
-#define SNMP_NOSUCHNAME 2
-#define SNMP_BADVALUE 3
-#define SNMP_READONLY 4
-#define SNMP_GENERROR 5
-#define SNMP_NOACCESS 6
-#define SNMP_WRONGTYPE 7
-#define SNMP_WRONGLENGTH 8
-#define SNMP_WRONGENCODING 9
-#define SNMP_WRONGVALUE 10
-#define SNMP_NOCREATION 11
-#define SNMP_INCONSISTENTVALUE 12
-#define SNMP_RESOURCEUNAVAILABLE 13
-#define SNMP_COMMITFAILED 14
-#define SNMP_UNDOFAILED 15
-#define SNMP_AUTHORIZATIONERROR 16
-#define SNMP_NOTWRITABLE 17
-#define SNMP_INCONSISTENTNAME 18
-
-/* General SNMP V1 Traps */
-#define SNMP_TRAP_COLDSTART 0
-#define SNMP_TRAP_WARMSTART 1
-#define SNMP_TRAP_LINKDOWN 2
-#define SNMP_TRAP_LINKUP 3
-#define SNMP_TRAP_AUTFAILURE 4
-#define SNMP_TRAP_EQPNEIGHBORLOSS 5
-#define SNMP_TRAP_ENTSPECIFIC 6
-
-/* SNMPv1 Types */
-#define SNMP_NULL 0
-#define SNMP_INTEGER 1 /* l */
-#define SNMP_OCTETSTR 2 /* c */
-#define SNMP_DISPLAYSTR 2 /* c */
-#define SNMP_OBJECTID 3 /* ul */
-#define SNMP_IPADDR 4 /* uc */
-#define SNMP_COUNTER 5 /* ul */
-#define SNMP_GAUGE 6 /* ul */
-#define SNMP_TIMETICKS 7 /* ul */
-#define SNMP_OPAQUE 8 /* c */
-
-/* Additional SNMPv2 Types */
-#define SNMP_UINTEGER 5 /* ul */
-#define SNMP_BITSTR 9 /* uc */
-#define SNMP_NSAP 10 /* uc */
-#define SNMP_COUNTER64 11 /* ul */
-#define SNMP_NOSUCHOBJECT 12
-#define SNMP_NOSUCHINSTANCE 13
-#define SNMP_ENDOFMIBVIEW 14
-
-union snmp_syntax
-{
- unsigned char uc[0]; /* 8 bit unsigned */
- char c[0]; /* 8 bit signed */
- unsigned long ul[0]; /* 32 bit unsigned */
- long l[0]; /* 32 bit signed */
-};
-
-struct snmp_object
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned short type;
- unsigned int syntax_len;
- union snmp_syntax syntax;
-};
-
-struct snmp_request
-{
- unsigned long id;
- unsigned int error_status;
- unsigned int error_index;
-};
-
-struct snmp_v1_trap
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned long ip_address; /* pointer */
- unsigned int general;
- unsigned int specific;
- unsigned long time;
-};
-
-/* SNMP types */
-#define SNMP_IPA 0
-#define SNMP_CNT 1
-#define SNMP_GGE 2
-#define SNMP_TIT 3
-#define SNMP_OPQ 4
-#define SNMP_C64 6
-
-/* SNMP errors */
-#define SERR_NSO 0
-#define SERR_NSI 1
-#define SERR_EOM 2
-
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check);
-struct snmp_cnv
-{
- unsigned int class;
- unsigned int tag;
- int syntax;
-};
-
-static const struct snmp_cnv snmp_conv[] = {
- {ASN1_UNI, ASN1_NUL, SNMP_NULL},
- {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
- {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
- {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
- {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
- {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
- {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
- {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
- {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
- {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
-
- /* SNMPv2 data types and errors */
- {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
- {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
- {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
- {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
- {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
- {0, 0, -1}
-};
-
-static unsigned char snmp_tag_cls2syntax(unsigned int tag,
- unsigned int cls,
- unsigned short *syntax)
-{
- const struct snmp_cnv *cnv;
-
- cnv = snmp_conv;
-
- while (cnv->syntax != -1) {
- if (cnv->tag == tag && cnv->class == cls) {
- *syntax = cnv->syntax;
- return 1;
- }
- cnv++;
- }
- return 0;
-}
-
-static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
- struct snmp_object **obj)
-{
- unsigned int cls, con, tag, len, idlen;
- unsigned short type;
- unsigned char *eoc, *end, *p;
- unsigned long *lp, *id;
- unsigned long ul;
- long l;
-
- *obj = NULL;
- id = NULL;
-
- if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &id, &idlen))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
- kfree(id);
- return 0;
- }
-
- if (con != ASN1_PRI) {
- kfree(id);
- return 0;
- }
-
- type = 0;
- if (!snmp_tag_cls2syntax(tag, cls, &type)) {
- kfree(id);
- return 0;
- }
-
- l = 0;
- switch (type) {
- case SNMP_INTEGER:
- len = sizeof(long);
- if (!asn1_long_decode(ctx, end, &l)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- return 0;
- }
- (*obj)->syntax.l[0] = l;
- break;
- case SNMP_OCTETSTR:
- case SNMP_OPAQUE:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- return 0;
- }
- memcpy((*obj)->syntax.c, p, len);
- kfree(p);
- break;
- case SNMP_NULL:
- case SNMP_NOSUCHOBJECT:
- case SNMP_NOSUCHINSTANCE:
- case SNMP_ENDOFMIBVIEW:
- len = 0;
- *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- return 0;
- }
- if (!asn1_null_decode(ctx, end)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- break;
- case SNMP_OBJECTID:
- if (!asn1_oid_decode(ctx, end, &lp, &len)) {
- kfree(id);
- return 0;
- }
- len *= sizeof(unsigned long);
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(lp);
- kfree(id);
- return 0;
- }
- memcpy((*obj)->syntax.ul, lp, len);
- kfree(lp);
- break;
- case SNMP_IPADDR:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- if (len != 4) {
- kfree(p);
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- return 0;
- }
- memcpy((*obj)->syntax.uc, p, len);
- kfree(p);
- break;
- case SNMP_COUNTER:
- case SNMP_GAUGE:
- case SNMP_TIMETICKS:
- len = sizeof(unsigned long);
- if (!asn1_ulong_decode(ctx, end, &ul)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- return 0;
- }
- (*obj)->syntax.ul[0] = ul;
- break;
- default:
- kfree(id);
- return 0;
- }
-
- (*obj)->syntax_len = len;
- (*obj)->type = type;
- (*obj)->id = id;
- (*obj)->id_len = idlen;
-
- if (!asn1_eoc_decode(ctx, eoc)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- return 1;
-}
-
-static unsigned char noinline_for_stack
-snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request)
-{
- unsigned int cls, con, tag;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_ulong_decode(ctx, end, &request->id))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_status))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_index))
- return 0;
-
- return 1;
-}
-
-/*
- * Fast checksum update for possibly oddly-aligned UDP byte, from the
- * code example in the draft.
- */
-static void fast_csum(__sum16 *csum,
- const unsigned char *optr,
- const unsigned char *nptr,
- int offset)
-{
- unsigned char s[4];
-
- if (offset & 1) {
- s[0] = ~0;
- s[1] = ~*optr;
- s[2] = 0;
- s[3] = *nptr;
- } else {
- s[0] = ~*optr;
- s[1] = ~0;
- s[2] = *nptr;
- s[3] = 0;
- }
-
- *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-}
-
-/*
- * Mangle IP address.
- * - begin points to the start of the snmp messgae
- * - addr points to the start of the address
- */
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check)
-{
- if (map->from == NOCT1(addr)) {
- u_int32_t old;
-
- if (debug)
- memcpy(&old, addr, sizeof(old));
-
- *addr = map->to;
-
- /* Update UDP checksum if being used */
- if (*check) {
- fast_csum(check,
- &map->from, &map->to, addr - begin);
-
- }
-
- if (debug)
- printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n",
- &old, addr);
- }
-}
-
-static unsigned char noinline_for_stack
-snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned int cls, con, tag, len;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_id_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
- goto err_id_free;
-
- if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
- goto err_id_free;
-
- /* IPv4 only */
- if (len != 4)
- goto err_addr_free;
-
- mangle_address(ctx->begin, ctx->pointer - 4, map, check);
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->general))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->specific))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
- goto err_addr_free;
-
- if (!asn1_ulong_decode(ctx, end, &trap->time))
- goto err_addr_free;
-
- return 1;
-
-err_addr_free:
- kfree((unsigned long *)trap->ip_address);
-
-err_id_free:
- kfree(trap->id);
-
- return 0;
-}
-
-/*****************************************************************************
- *
- * Misc. routines
- *
- *****************************************************************************/
-
-/*
- * Parse and mangle SNMP message according to mapping.
- * (And this is the fucking 'basic' method).
- */
-static int snmp_parse_mangle(unsigned char *msg,
- u_int16_t len,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned char *eoc, *end;
- unsigned int cls, con, tag, vers, pdutype;
- struct asn1_ctx ctx;
- struct asn1_octstr comm;
- struct snmp_object *obj;
-
- if (debug > 1)
- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
- msg, len, 0);
-
- asn1_open(&ctx, msg, len);
-
- /*
- * Start of SNMP message.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- /*
- * Version 1 or 2 handled.
- */
- if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
- if (!asn1_uint_decode (&ctx, end, &vers))
- return 0;
- if (debug > 1)
- pr_debug("bsalg: snmp version: %u\n", vers + 1);
- if (vers > 1)
- return 1;
-
- /*
- * Community.
- */
- if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
- return 0;
- if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
- return 0;
- if (debug > 1) {
- unsigned int i;
-
- pr_debug("bsalg: community: ");
- for (i = 0; i < comm.len; i++)
- pr_cont("%c", comm.data[i]);
- pr_cont("\n");
- }
- kfree(comm.data);
-
- /*
- * PDU type
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
- return 0;
- if (cls != ASN1_CTX || con != ASN1_CON)
- return 0;
- if (debug > 1) {
- static const unsigned char *const pdus[] = {
- [SNMP_PDU_GET] = "get",
- [SNMP_PDU_NEXT] = "get-next",
- [SNMP_PDU_RESPONSE] = "response",
- [SNMP_PDU_SET] = "set",
- [SNMP_PDU_TRAP1] = "trapv1",
- [SNMP_PDU_BULK] = "bulk",
- [SNMP_PDU_INFORM] = "inform",
- [SNMP_PDU_TRAP2] = "trapv2"
- };
-
- if (pdutype > SNMP_PDU_TRAP2)
- pr_debug("bsalg: bad pdu type %u\n", pdutype);
- else
- pr_debug("bsalg: pdu: %s\n", pdus[pdutype]);
- }
- if (pdutype != SNMP_PDU_RESPONSE &&
- pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
- return 1;
-
- /*
- * Request header or v1 trap
- */
- if (pdutype == SNMP_PDU_TRAP1) {
- struct snmp_v1_trap trap;
- unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
-
- if (ret) {
- kfree(trap.id);
- kfree((unsigned long *)trap.ip_address);
- } else
- return ret;
-
- } else {
- struct snmp_request req;
-
- if (!snmp_request_decode(&ctx, &req))
- return 0;
-
- if (debug > 1)
- pr_debug("bsalg: request: id=0x%lx error_status=%u "
- "error_index=%u\n", req.id, req.error_status,
- req.error_index);
- }
-
- /*
- * Loop through objects, look for IP addresses to mangle.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- while (!asn1_eoc_decode(&ctx, eoc)) {
- unsigned int i;
-
- if (!snmp_object_decode(&ctx, &obj)) {
- if (obj) {
- kfree(obj->id);
- kfree(obj);
- }
- return 0;
- }
-
- if (debug > 1) {
- pr_debug("bsalg: object: ");
- for (i = 0; i < obj->id_len; i++) {
- if (i > 0)
- pr_cont(".");
- pr_cont("%lu", obj->id[i]);
- }
- pr_cont(": type=%u\n", obj->type);
-
- }
-
- if (obj->type == SNMP_IPADDR)
- mangle_address(ctx.begin, ctx.pointer - 4, map, check);
-
- kfree(obj->id);
- kfree(obj);
- }
-
- if (!asn1_eoc_decode(&ctx, eoc))
- return 0;
-
- return 1;
-}
-
-/*****************************************************************************
- *
- * NAT routines.
- *
- *****************************************************************************/
-
-/*
- * SNMP translation routine.
- */
-static int snmp_translate(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
- struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
- u_int16_t udplen = ntohs(udph->len);
- u_int16_t paylen = udplen - sizeof(struct udphdr);
- int dir = CTINFO2DIR(ctinfo);
- struct oct1_map map;
-
- /*
- * Determine mappping for application layer addresses based
- * on NAT manipulations for the packet.
- */
- if (dir == IP_CT_DIR_ORIGINAL) {
- /* SNAT traps */
- map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
- } else {
- /* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
- }
-
- if (map.from == map.to)
- return NF_ACCEPT;
-
- if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
- paylen, &map, &udph->check)) {
- net_warn_ratelimited("bsalg: parser failed\n");
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-/* We don't actually set up expectations, just adjust internal IP
- * addresses if this is being NATted */
-static int help(struct sk_buff *skb, unsigned int protoff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir = CTINFO2DIR(ctinfo);
- unsigned int ret;
- const struct iphdr *iph = ip_hdr(skb);
- const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
-
- /* SNMP replies and originating SNMP traps get mangled */
- if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
- return NF_ACCEPT;
- if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- /* No NAT? */
- if (!(ct->status & IPS_NAT_MASK))
- return NF_ACCEPT;
-
- /*
- * Make sure the packet length is ok. So far, we were only guaranteed
- * to have a valid length IP header plus 8 bytes, which means we have
- * enough room for a UDP header. Just verify the UDP length field so we
- * can mess around with the payload.
- */
- if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
- net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
- &iph->saddr, &iph->daddr);
- return NF_DROP;
- }
-
- if (!skb_make_writable(skb, skb->len))
- return NF_DROP;
-
- spin_lock_bh(&snmp_lock);
- ret = snmp_translate(ct, ctinfo, skb);
- spin_unlock_bh(&snmp_lock);
- return ret;
-}
-
-static const struct nf_conntrack_expect_policy snmp_exp_policy = {
- .max_expected = 0,
- .timeout = 180,
-};
-
-static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
- .me = THIS_MODULE,
- .help = help,
- .expect_policy = &snmp_exp_policy,
- .name = "snmp_trap",
- .tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
- .tuple.dst.protonum = IPPROTO_UDP,
-};
-
-/*****************************************************************************
- *
- * Module stuff.
- *
- *****************************************************************************/
-
-static int __init nf_nat_snmp_basic_init(void)
-{
- BUG_ON(nf_nat_snmp_hook != NULL);
- RCU_INIT_POINTER(nf_nat_snmp_hook, help);
-
- return nf_conntrack_helper_register(&snmp_trap_helper);
-}
-
-static void __exit nf_nat_snmp_basic_fini(void)
-{
- RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
- synchronize_rcu();
- nf_conntrack_helper_unregister(&snmp_trap_helper);
-}
-
-module_init(nf_nat_snmp_basic_init);
-module_exit(nf_nat_snmp_basic_fini);
-
-module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
new file mode 100644
index 000000000000..b6e277093e7e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -0,0 +1,235 @@
+/*
+ * nf_nat_snmp_basic.c
+ *
+ * Basic SNMP Application Layer Gateway
+ *
+ * This IP NAT module is intended for use with SNMP network
+ * discovery and monitoring applications where target networks use
+ * conflicting private address realms.
+ *
+ * Static NAT is used to remap the networks from the view of the network
+ * management system at the IP layer, and this module remaps some application
+ * layer addresses to match.
+ *
+ * The simplest form of ALG is performed, where only tagged IP addresses
+ * are modified. The module does not need to be MIB aware and only scans
+ * messages at the ASN.1/BER level.
+ *
+ * Currently, only SNMPv1 and SNMPv2 are supported.
+ *
+ * More information on ALG and associated issues can be found in
+ * RFC 2962
+ *
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
+ * McLean & Jochen Friedrich, stripped down for use in the kernel.
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
+#include "nf_nat_snmp_basic-asn1.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+MODULE_ALIAS("ip_nat_snmp_basic");
+
+#define SNMP_PORT 161
+#define SNMP_TRAP_PORT 162
+
+static DEFINE_SPINLOCK(snmp_lock);
+
+struct snmp_ctx {
+ unsigned char *begin;
+ __sum16 *check;
+ __be32 from;
+ __be32 to;
+};
+
+static void fast_csum(struct snmp_ctx *ctx, unsigned char offset)
+{
+ unsigned char s[12] = {0,};
+ int size;
+
+ if (offset & 1) {
+ memcpy(&s[1], &ctx->from, 4);
+ memcpy(&s[7], &ctx->to, 4);
+ s[0] = ~0;
+ s[1] = ~s[1];
+ s[2] = ~s[2];
+ s[3] = ~s[3];
+ s[4] = ~s[4];
+ s[5] = ~0;
+ size = 12;
+ } else {
+ memcpy(&s[0], &ctx->from, 4);
+ memcpy(&s[4], &ctx->to, 4);
+ s[0] = ~s[0];
+ s[1] = ~s[1];
+ s[2] = ~s[2];
+ s[3] = ~s[3];
+ size = 8;
+ }
+ *ctx->check = csum_fold(csum_partial(s, size,
+ ~csum_unfold(*ctx->check)));
+}
+
+int snmp_version(void *context, size_t hdrlen, unsigned char tag,
+ const void *data, size_t datalen)
+{
+ if (*(unsigned char *)data > 1)
+ return -ENOTSUPP;
+ return 1;
+}
+
+int snmp_helper(void *context, size_t hdrlen, unsigned char tag,
+ const void *data, size_t datalen)
+{
+ struct snmp_ctx *ctx = (struct snmp_ctx *)context;
+ __be32 *pdata = (__be32 *)data;
+
+ if (*pdata == ctx->from) {
+ pr_debug("%s: %pI4 to %pI4\n", __func__,
+ (void *)&ctx->from, (void *)&ctx->to);
+
+ if (*ctx->check)
+ fast_csum(ctx, (unsigned char *)data - ctx->begin);
+ *pdata = ctx->to;
+ }
+
+ return 1;
+}
+
+static int snmp_translate(struct nf_conn *ct, int dir, struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+ u16 datalen = ntohs(udph->len) - sizeof(struct udphdr);
+ char *data = (unsigned char *)udph + sizeof(struct udphdr);
+ struct snmp_ctx ctx;
+ int ret;
+
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ ctx.from = ct->tuplehash[dir].tuple.src.u3.ip;
+ ctx.to = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ } else {
+ ctx.from = ct->tuplehash[!dir].tuple.src.u3.ip;
+ ctx.to = ct->tuplehash[dir].tuple.dst.u3.ip;
+ }
+
+ if (ctx.from == ctx.to)
+ return NF_ACCEPT;
+
+ ctx.begin = (unsigned char *)udph + sizeof(struct udphdr);
+ ctx.check = &udph->check;
+ ret = asn1_ber_decoder(&nf_nat_snmp_basic_decoder, &ctx, data, datalen);
+ if (ret < 0) {
+ nf_ct_helper_log(skb, ct, "parser failed\n");
+ return NF_DROP;
+ }
+
+ return NF_ACCEPT;
+}
+
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted
+ */
+static int help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int ret;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+
+ /* SNMP replies and originating SNMP traps get mangled */
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* No NAT? */
+ if (!(ct->status & IPS_NAT_MASK))
+ return NF_ACCEPT;
+
+ /* Make sure the packet length is ok. So far, we were only guaranteed
+ * to have a valid length IP header plus 8 bytes, which means we have
+ * enough room for a UDP header. Just verify the UDP length field so we
+ * can mess around with the payload.
+ */
+ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
+ nf_ct_helper_log(skb, ct, "dropping malformed packet\n");
+ return NF_DROP;
+ }
+
+ if (!skb_make_writable(skb, skb->len)) {
+ nf_ct_helper_log(skb, ct, "cannot mangle packet");
+ return NF_DROP;
+ }
+
+ spin_lock_bh(&snmp_lock);
+ ret = snmp_translate(ct, dir, skb);
+ spin_unlock_bh(&snmp_lock);
+ return ret;
+}
+
+static const struct nf_conntrack_expect_policy snmp_exp_policy = {
+ .max_expected = 0,
+ .timeout = 180,
+};
+
+static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp_trap",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+};
+
+static int __init nf_nat_snmp_basic_init(void)
+{
+ BUG_ON(nf_nat_snmp_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, help);
+
+ return nf_conntrack_helper_register(&snmp_trap_helper);
+}
+
+static void __exit nf_nat_snmp_basic_fini(void)
+{
+ RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ synchronize_rcu();
+ nf_conntrack_helper_unregister(&snmp_trap_helper);
+}
+
+module_init(nf_nat_snmp_basic_init);
+module_exit(nf_nat_snmp_basic_fini);
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 4bbc273b45e8..036c074736b0 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,51 +21,12 @@ nft_do_chain_arp(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_arp __read_mostly = {
- .family = NFPROTO_ARP,
- .nhooks = NF_ARP_NUMHOOKS,
- .owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_ARP_IN] = nft_do_chain_arp,
- [NF_ARP_OUT] = nft_do_chain_arp,
- [NF_ARP_FORWARD] = nft_do_chain_arp,
- },
-};
-
-static int nf_tables_arp_init_net(struct net *net)
-{
- net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.arp== NULL)
- return -ENOMEM;
-
- memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
-
- if (nft_register_afinfo(net, net->nft.arp) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.arp);
- return -ENOMEM;
-}
-
-static void nf_tables_arp_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.arp);
- kfree(net->nft.arp);
-}
-
-static struct pernet_operations nf_tables_arp_net_ops = {
- .init = nf_tables_arp_init_net,
- .exit = nf_tables_arp_exit_net,
-};
-
static const struct nf_chain_type filter_arp = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -73,26 +34,19 @@ static const struct nf_chain_type filter_arp = {
.owner = THIS_MODULE,
.hook_mask = (1 << NF_ARP_IN) |
(1 << NF_ARP_OUT),
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ },
};
static int __init nf_tables_arp_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_arp);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_arp_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_arp);
-
- return ret;
+ return nft_register_chain_type(&filter_arp);
}
static void __exit nf_tables_arp_exit(void)
{
- unregister_pernet_subsys(&nf_tables_arp_net_ops);
nft_unregister_chain_type(&filter_arp);
}
@@ -101,4 +55,4 @@ module_exit(nf_tables_arp_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
+MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 2840a29b2e04..96f955496d5f 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -24,69 +24,12 @@ static unsigned int nft_do_chain_ipv4(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv4_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct iphdr) ||
- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv4(priv, skb, state);
-}
-
-struct nft_af_info nft_af_ipv4 __read_mostly = {
- .family = NFPROTO_IPV4,
- .nhooks = NF_INET_NUMHOOKS,
- .owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
- },
-};
-EXPORT_SYMBOL_GPL(nft_af_ipv4);
-
-static int nf_tables_ipv4_init_net(struct net *net)
-{
- net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.ipv4 == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
-
- if (nft_register_afinfo(net, net->nft.ipv4) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.ipv4);
- return -ENOMEM;
-}
-
-static void nf_tables_ipv4_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.ipv4);
- kfree(net->nft.ipv4);
-}
-
-static struct pernet_operations nf_tables_ipv4_net_ops = {
- .init = nf_tables_ipv4_init_net,
- .exit = nf_tables_ipv4_exit_net,
-};
-
static const struct nf_chain_type filter_ipv4 = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -97,26 +40,22 @@ static const struct nf_chain_type filter_ipv4 = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
};
static int __init nf_tables_ipv4_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_ipv4);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_ipv4);
-
- return ret;
+ return nft_register_chain_type(&filter_ipv4);
}
static void __exit nf_tables_ipv4_exit(void)
{
- unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
nft_unregister_chain_type(&filter_ipv4);
}
@@ -125,4 +64,4 @@ module_exit(nf_tables_ipv4_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_INET);
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f5c66a7a4bf2..f2a490981594 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 30493beb611a..d965c225b9f6 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -33,12 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
const struct iphdr *iph;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
mark = skb->mark;
iph = ip_hdr(skb);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9f37c4727861..dc5edc8f7564 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -83,7 +83,6 @@ static int sockstat_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations sockstat_seq_fops = {
- .owner = THIS_MODULE,
.open = sockstat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -467,7 +466,6 @@ static int snmp_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations snmp_seq_fops = {
- .owner = THIS_MODULE,
.open = snmp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -515,7 +513,6 @@ static int netstat_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations netstat_seq_fops = {
- .owner = THIS_MODULE,
.open = netstat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33b70bfd1122..7c509697ebc7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,18 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
+ int hdrincl;
err = -EMSGSIZE;
if (len > 0xFFFF)
goto out;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
+ */
+ hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
/*
* Check the flags.
*/
@@ -593,7 +600,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
- if (inet->hdrincl)
+ if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -610,17 +617,30 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
- if (!inet->hdrincl) {
+ if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -645,7 +665,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg, len,
&rt, msg->msg_flags, &ipc.sockc);
@@ -1112,7 +1132,6 @@ static int raw_v4_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations raw_seq_fops = {
- .owner = THIS_MODULE,
.open = raw_v4_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 43b69af242e1..49cc1c1df1ba 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,7 +240,6 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations rt_cache_seq_fops = {
- .owner = THIS_MODULE,
.open = rt_cache_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -331,7 +330,6 @@ static int rt_cpu_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations rt_cpu_seq_fops = {
- .owner = THIS_MODULE,
.open = rt_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -369,7 +367,6 @@ static int rt_acct_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations rt_acct_proc_fops = {
- .owner = THIS_MODULE,
.open = rt_acct_proc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1106,7 +1103,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
new = true;
}
- __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
+ __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
if (!dst_check(&rt->dst, 0)) {
if (new)
@@ -2762,6 +2759,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err == 0 && rt->dst.error)
err = -rt->dst.error;
} else {
+ fl4.flowi4_iif = LOOPBACK_IFINDEX;
rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
err = 0;
if (IS_ERR(rt))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf97317e6c97..c059aa7df0a9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,8 +283,6 @@
#include <asm/ioctls.h>
#include <net/busy_poll.h>
-#include <trace/events/tcp.h>
-
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -465,7 +463,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
tcp_mtup_init(sk);
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
- tcp_call_bpf(sk, bpf_op);
+ tcp_call_bpf(sk, bpf_op, 0, NULL);
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
}
@@ -493,18 +491,16 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
* take care of normal races (between the test and the event) and we don't
* go look at any of the socket buffers directly.
*/
-unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
- unsigned int mask;
+ __poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
int state;
- sock_rps_record_flow(sk);
-
sock_poll_wait(file, sk_sleep(sk), wait);
- state = sk_state_load(sk);
+ state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
@@ -1106,12 +1102,15 @@ static int linear_payload_sz(bool first_skb)
return 0;
}
-static int select_size(const struct sock *sk, bool sg, bool first_skb)
+static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
{
const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sg) {
+ if (zc)
+ return 0;
+
if (sk_can_gso(sk)) {
tmp = linear_payload_sz(first_skb);
} else {
@@ -1188,7 +1187,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
bool process_backlog = false;
- bool sg;
+ bool sg, zc = false;
long timeo;
flags = msg->msg_flags;
@@ -1206,7 +1205,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err;
}
- if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG))
+ zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+ if (!zc)
uarg->zerocopy = 0;
}
@@ -1283,6 +1283,7 @@ restart:
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
+ int linear;
new_segment:
/* Allocate new segment. If the interface is SG,
@@ -1296,9 +1297,8 @@ new_segment:
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- skb = sk_stream_alloc_skb(sk,
- select_size(sk, sg, first_skb),
- sk->sk_allocation,
+ linear = select_size(sk, sg, first_skb, zc);
+ skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
first_skb);
if (!skb)
goto wait_for_memory;
@@ -1327,13 +1327,13 @@ new_segment:
copy = msg_data_left(msg);
/* Where to copy to? */
- if (skb_availroom(skb) > 0) {
+ if (skb_availroom(skb) > 0 && !zc) {
/* We have some space in skb head. Superb! */
copy = min_t(int, copy, skb_availroom(skb));
err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
if (err)
goto do_fault;
- } else if (!uarg || !uarg->zerocopy) {
+ } else if (!zc) {
bool merge = true;
int i = skb_shinfo(skb)->nr_frags;
struct page_frag *pfrag = sk_page_frag(sk);
@@ -1373,8 +1373,10 @@ new_segment:
pfrag->offset += copy;
} else {
err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
- if (err == -EMSGSIZE || err == -EEXIST)
+ if (err == -EMSGSIZE || err == -EEXIST) {
+ tcp_mark_push(tp, skb);
goto new_segment;
+ }
if (err < 0)
goto do_error;
copy = err;
@@ -1731,8 +1733,8 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb,
}
/* Similar to __sock_recv_timestamp, but does not require an skb */
-void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
- struct scm_timestamping *tss)
+static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+ struct scm_timestamping *tss)
{
struct timeval tv;
bool has_timestamping = false;
@@ -2040,7 +2042,29 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
- trace_tcp_set_state(sk, oldstate, state);
+ /* We defined a new enum for TCP states that are exported in BPF
+ * so as not force the internal TCP states to be frozen. The
+ * following checks will detect if an internal state value ever
+ * differs from the BPF value. If this ever happens, then we will
+ * need to remap the internal value to the BPF value before calling
+ * tcp_call_bpf_2arg.
+ */
+ BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
+ BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
+ BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
+ BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
+ tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
switch (state) {
case TCP_ESTABLISHED:
@@ -2065,7 +2089,7 @@ void tcp_set_state(struct sock *sk, int state)
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
- sk_state_store(sk, state);
+ inet_sk_state_store(sk, state);
#ifdef STATE_TRACE
SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2298,6 +2322,9 @@ adjudge_to_death:
tcp_send_active_reset(sk, GFP_ATOMIC);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
+ } else if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_set_state(sk, TCP_CLOSE);
}
}
@@ -2412,6 +2439,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
@@ -2430,6 +2458,12 @@ int tcp_disconnect(struct sock *sk, int flags)
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ sk->sk_frag.offset = 0;
+ }
+
sk->sk_error_report(sk);
return err;
}
@@ -2919,7 +2953,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
if (sk->sk_type != SOCK_STREAM)
return;
- info->tcpi_state = sk_state_load(sk);
+ info->tcpi_state = inet_sk_state_load(sk);
/* Report meaningful fields for all TCP states, including listeners */
rate = READ_ONCE(sk->sk_pacing_rate);
@@ -3577,6 +3611,9 @@ void __init tcp_init(void)
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
inet_hashinfo_init(&tcp_hashinfo);
+ inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
+ thash_entries, 21, /* one slot per 2 MB*/
+ 0, 64 * 1024);
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 69ee877574d0..a471f696e13c 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -110,7 +110,8 @@ struct bbr {
u32 lt_last_lost; /* LT intvl start: tp->lost */
u32 pacing_gain:10, /* current gain for setting pacing rate */
cwnd_gain:10, /* current gain for setting cwnd */
- full_bw_cnt:3, /* number of rounds without large bw gains */
+ full_bw_reached:1, /* reached full bw in Startup? */
+ full_bw_cnt:2, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
has_seen_rtt:1, /* have we seen an RTT sample yet? */
unused_b:5;
@@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk)
{
const struct bbr *bbr = inet_csk_ca(sk);
- return bbr->full_bw_cnt >= bbr_full_bw_cnt;
+ return bbr->full_bw_reached;
}
/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
@@ -480,7 +481,8 @@ static void bbr_advance_cycle_phase(struct sock *sk)
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
bbr->cycle_mstamp = tp->delivered_mstamp;
- bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
+ bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
+ bbr_pacing_gain[bbr->cycle_idx];
}
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -489,8 +491,7 @@ static void bbr_update_cycle_phase(struct sock *sk,
{
struct bbr *bbr = inet_csk_ca(sk);
- if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
- bbr_is_next_cycle_phase(sk, rs))
+ if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
bbr_advance_cycle_phase(sk);
}
@@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk,
return;
}
++bbr->full_bw_cnt;
+ bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
}
/* If pipe is probably full, drain the queue and then enter steady-state. */
@@ -764,7 +766,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
- (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
+ (rs->rtt_us <= bbr->min_rtt_us ||
+ (filter_expired && !rs->is_ack_delayed))) {
bbr->min_rtt_us = rs->rtt_us;
bbr->min_rtt_stamp = tcp_jiffies32;
}
@@ -850,6 +853,7 @@ static void bbr_init(struct sock *sk)
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
+ bbr->full_bw_reached = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
bbr->cycle_mstamp = 0;
@@ -871,6 +875,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
*/
static u32 bbr_undo_cwnd(struct sock *sk)
{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
+ bbr->full_bw_cnt = 0;
+ bbr_reset_lt_bw_sampling(sk);
return tcp_sk(sk)->snd_cwnd;
}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index abbf0edcf6c2..81148f7a2323 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -24,7 +24,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
{
struct tcp_info *info = _info;
- if (sk_state_load(sk) == TCP_LISTEN) {
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
r->idiag_rqueue = sk->sk_ack_backlog;
r->idiag_wqueue = sk->sk_max_ack_backlog;
} else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 78c192ee03a4..018a48477355 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -379,18 +379,9 @@ fastopen:
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
- unsigned long last_syn_loss = 0;
const struct dst_entry *dst;
- int syn_loss = 0;
- tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
-
- /* Recurring FO SYN losses: no cookie or data in SYN */
- if (syn_loss > 1 &&
- time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
- cookie->len = -1;
- return false;
- }
+ tcp_fastopen_cache_get(sk, mss, cookie);
/* Firewall blackhole issue check */
if (tcp_fastopen_active_should_disable(sk)) {
@@ -448,6 +439,8 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
* following circumstances:
* 1. client side TFO socket receives out of order FIN
* 2. client side TFO socket receives out of order RST
+ * 3. client side TFO socket has timed out three times consecutively during
+ * or after handshake
* We disable active side TFO globally for 1hr at first. Then if it
* happens again, we disable it for 2h, then 4h, 8h, ...
* And we reset the timeout back to 1hr when we see a successful active
@@ -524,3 +517,20 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
dst_release(dst);
}
}
+
+void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired)
+{
+ u32 timeouts = inet_csk(sk)->icsk_retransmits;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* Broken middle-boxes may black-hole Fast Open connection during or
+ * even after the handshake. Be extremely conservative and pause
+ * Fast Open globally after hitting the third consecutive timeout or
+ * exceeding the configured timeout limit.
+ */
+ if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) &&
+ (timeouts == 2 || (timeouts < 2 && expired))) {
+ tcp_fastopen_active_disable(sk);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ }
+}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 734cfc8ff76e..cfa51cfd2d99 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -97,6 +97,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
+#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -508,9 +509,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
u32 new_sample = tp->rcv_rtt_est.rtt_us;
long m = sample;
- if (m == 0)
- m = 1;
-
if (new_sample != 0) {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
@@ -547,6 +545,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
+ if (!delta_us)
+ delta_us = 1;
tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
@@ -563,8 +563,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ u32 delta_us;
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
tcp_rcv_rtt_update(tp, delta_us, 0);
}
}
@@ -576,9 +579,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 copied;
int time;
- int copied;
+ tcp_mstamp_refresh(tp);
time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
@@ -599,38 +603,31 @@ void tcp_rcv_space_adjust(struct sock *sk)
if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvwin, rcvmem, rcvbuf;
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
/* minimal window to cope with packet losses, assuming
* steady state. Add some cushion because of small variations.
*/
- rcvwin = (copied << 1) + 16 * tp->advmss;
+ rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
- /* If rate increased by 25%,
- * assume slow start, rcvwin = 3 * copied
- * If rate increased by 50%,
- * assume sender can use 2x growth, rcvwin = 4 * copied
- */
- if (copied >=
- tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
- if (copied >=
- tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
- rcvwin <<= 1;
- else
- rcvwin += (rcvwin >> 1);
- }
+ /* Accommodate for sender rate increase (eg. slow start) */
+ grow = rcvwin * (copied - tp->rcvq_space.space);
+ do_div(grow, tp->rcvq_space.space);
+ rcvwin += (grow << 1);
rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128;
- rcvbuf = min(rcvwin / tp->advmss * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ do_div(rcvwin, tp->advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf;
/* Make the window clamp follow along. */
- tp->window_clamp = rcvwin;
+ tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
}
}
tp->rcvq_space.space = copied;
@@ -1941,6 +1938,8 @@ void tcp_enter_loss(struct sock *sk)
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
+ /* Mark SACK reneging until we recover from this loss event. */
+ tp->is_sack_reneg = 1;
}
tcp_clear_all_retrans_hints(tp);
@@ -2326,6 +2325,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
}
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->undo_marker = 0;
+ tp->rack.advanced = 1; /* Force RACK to re-exam losses */
}
static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2364,6 +2364,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
return true;
}
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
return false;
}
@@ -2397,8 +2398,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
- if (frto_undo || tcp_is_sack(tp))
+ if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
+ }
return true;
}
return false;
@@ -2855,11 +2858,18 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
*rexmit = REXMIT_LOST;
}
-static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
+static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
struct tcp_sock *tp = tcp_sk(sk);
+ if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+ /* If the remote keeps returning delayed ACKs, eventually
+ * the min filter would pick it up and overestimate the
+ * prop. delay when it expires. Skip suspected delayed ACKs.
+ */
+ return;
+ }
minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
}
@@ -2899,7 +2909,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* always taken together with ACK, SACK, or TS-opts. Any negative
* values will be skipped with the seq_rtt_us < 0 check above.
*/
- tcp_update_rtt_min(sk, ca_rtt_us);
+ tcp_update_rtt_min(sk, ca_rtt_us, flag);
tcp_rtt_estimator(sk, seq_rtt_us);
tcp_set_rto(sk);
@@ -3123,6 +3133,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
+
+ if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
+ last_in_flight && !prior_sacked && fully_acked &&
+ sack->rate->prior_delivered + 1 == tp->delivered &&
+ !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
+ /* Conservatively mark a delayed ACK. It's typically
+ * from a lone runt packet over the round trip to
+ * a receiver w/o out-of-order or CE events.
+ */
+ flag |= FLAG_ACK_MAYBE_DELAYED;
+ }
}
if (sack->first_sackt) {
sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
@@ -3495,6 +3516,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
struct tcp_sacktag_state sack_state;
struct rate_sample rs = { .prior_delivered = 0 };
u32 prior_snd_una = tp->snd_una;
+ bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
@@ -3611,7 +3633,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
- tcp_rate_gen(sk, delivered, lost, sack_state.rate);
+ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
+ tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
return 1;
@@ -5296,6 +5319,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
unsigned int len = skb->len;
struct tcp_sock *tp = tcp_sk(sk);
+ /* TCP congestion window tracking */
+ trace_tcp_probe(sk, skb);
+
tcp_mstamp_refresh(tp);
if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c6bc0c4d19c6..95738aa0d8a6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
@@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_filter);
+static void tcp_v4_restore_cb(struct sk_buff *skb)
+{
+ memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
+ sizeof(struct inet_skb_parm));
+}
+
+static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
+ const struct tcphdr *th)
+{
+ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+ * barrier() makes sure compiler wont play fool^Waliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+ sizeof(struct inet_skb_parm));
+ barrier();
+
+ TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+ TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+ skb->len - th->doff * 4);
+ TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+ TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
+ TCP_SKB_CB(skb)->sacked = 0;
+ TCP_SKB_CB(skb)->has_rxtstamp =
+ skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+}
+
/*
* From tcp_input.c
*/
@@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
- /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
- * barrier() makes sure compiler wont play fool^Waliasing games.
- */
- memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
- sizeof(struct inet_skb_parm));
- barrier();
-
- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff * 4);
- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
- TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
- TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
- TCP_SKB_CB(skb)->sacked = 0;
- TCP_SKB_CB(skb)->has_rxtstamp =
- skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
-
lookup:
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
th->dest, sdif, &refcounted);
@@ -1679,14 +1689,19 @@ process:
sock_hold(sk);
refcounted = true;
nsk = NULL;
- if (!tcp_filter(sk, skb))
+ if (!tcp_filter(sk, skb)) {
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);
+ tcp_v4_fill_cb(skb, iph, th);
nsk = tcp_check_req(sk, skb, req, false);
+ }
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
}
if (nsk == sk) {
reqsk_put(req);
+ tcp_v4_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
goto discard_and_relse;
@@ -1712,6 +1727,7 @@ process:
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
+ tcp_v4_fill_cb(skb, iph, th);
skb->dev = NULL;
@@ -1742,6 +1758,8 @@ no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
+ tcp_v4_fill_cb(skb, iph, th);
+
if (tcp_checksum_complete(skb)) {
csum_error:
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
@@ -1768,6 +1786,8 @@ do_time_wait:
goto discard_it;
}
+ tcp_v4_fill_cb(skb, iph, th);
+
if (tcp_checksum_complete(skb)) {
inet_twsk_put(inet_twsk(sk));
goto csum_error;
@@ -1784,6 +1804,7 @@ do_time_wait:
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
+ tcp_v4_restore_cb(skb);
refcounted = false;
goto process;
}
@@ -1890,7 +1911,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* Clean up the MD5 key list, if any */
if (tp->md5sig_info) {
tcp_clear_md5_list(sk);
- kfree_rcu(tp->md5sig_info, rcu);
+ kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
tp->md5sig_info = NULL;
}
#endif
@@ -2260,7 +2281,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
timer_expires = jiffies;
}
- state = sk_state_load(sk);
+ state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
rx_queue = sk->sk_ack_backlog;
else
@@ -2337,7 +2358,6 @@ out:
}
static const struct file_operations tcp_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = tcp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 7097f92d16e5..03b51cdcc731 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -546,8 +546,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
static DEFINE_SEQLOCK(fastopen_seqlock);
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- struct tcp_fastopen_cookie *cookie,
- int *syn_loss, unsigned long *last_syn_loss)
+ struct tcp_fastopen_cookie *cookie)
{
struct tcp_metrics_block *tm;
@@ -564,8 +563,6 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
*cookie = tfom->cookie;
if (cookie->len <= 0 && tfom->try_exp == 1)
cookie->exp = true;
- *syn_loss = tfom->syn_loss;
- *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0;
} while (read_seqretry(&fastopen_seqlock, seq));
}
rcu_read_unlock();
@@ -895,7 +892,7 @@ static void tcp_metrics_flush_all(struct net *net)
pp = &hb->chain;
for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
match = net ? net_eq(tm_net(tm), net) :
- !atomic_read(&tm_net(tm)->count);
+ !refcount_read(&tm_net(tm)->count);
if (match) {
*pp = tm->tcpm_next;
kfree_rcu(tm, rcu_head);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e36eff0403f4..a8384b0c11f8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -310,10 +310,17 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (state == TCP_TIME_WAIT)
timeo = TCP_TIMEWAIT_LEN;
+ /* tw_timer is pinned, so we need to make sure BH are disabled
+ * in following section, otherwise timer handler could run before
+ * we complete the initialization.
+ */
+ local_bh_disable();
inet_twsk_schedule(tw, timeo);
- /* Linkage updates. */
- __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
- inet_twsk_put(tw);
+ /* Linkage updates.
+ * Note that access to tw after this point is illegal.
+ */
+ inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+ local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 0b5a05bd82e3..764298e52577 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -146,7 +146,7 @@ static void tcpnv_init(struct sock *sk)
* within a datacenter, where we have reasonable estimates of
* RTTs
*/
- base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
if (base_rtt > 0) {
ca->nv_base_rtt = base_rtt;
ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
@@ -364,7 +364,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
*/
cwnd_by_slope = (u32)
div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt,
- (u64)(80000 * tp->mss_cache));
+ 80000ULL * tp->mss_cache);
max_win = cwnd_by_slope + nv_pad;
/* If cwnd > max_win, decrease cwnd
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b6a2aa1dcf56..4d58e2ce0b5b 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a4d214c7b506..e9f985e42405 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1944,7 +1944,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
in_flight = tcp_packets_in_flight(tp);
- BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
+ BUG_ON(tcp_skb_pcount(skb) <= 1);
+ BUG_ON(tp->snd_cwnd <= in_flight);
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
@@ -2414,15 +2415,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
/* Schedule a loss probe in 2*RTT for SACK capable connections
- * in Open state, that are either limited by cwnd or application.
+ * not in loss recovery, that are either limited by cwnd or application.
*/
if ((early_retrans != 3 && early_retrans != 4) ||
!tp->packets_out || !tcp_is_sack(tp) ||
- icsk->icsk_ca_state != TCP_CA_Open)
- return false;
-
- if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
- !tcp_write_queue_empty(sk))
+ (icsk->icsk_ca_state != TCP_CA_Open &&
+ icsk->icsk_ca_state != TCP_CA_CWR))
return false;
/* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2907,6 +2905,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
+ TCP_SKB_CB(skb)->seq, segs, err);
+
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
@@ -3471,7 +3473,7 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
- tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+ tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
deleted file mode 100644
index 697f4c67b2e3..000000000000
--- a/net/ipv4/tcp_probe.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * tcpprobe - Observe the TCP flow with kprobes.
- *
- * The idea for this came from Werner Almesberger's umlsim
- * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kprobes.h>
-#include <linux/socket.h>
-#include <linux/tcp.h>
-#include <linux/slab.h>
-#include <linux/proc_fs.h>
-#include <linux/module.h>
-#include <linux/ktime.h>
-#include <linux/time.h>
-#include <net/net_namespace.h>
-
-#include <net/tcp.h>
-
-MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
-MODULE_DESCRIPTION("TCP cwnd snooper");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("1.1");
-
-static int port __read_mostly;
-MODULE_PARM_DESC(port, "Port to match (0=all)");
-module_param(port, int, 0);
-
-static unsigned int bufsize __read_mostly = 4096;
-MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
-module_param(bufsize, uint, 0);
-
-static unsigned int fwmark __read_mostly;
-MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
-module_param(fwmark, uint, 0);
-
-static int full __read_mostly;
-MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
-module_param(full, int, 0);
-
-static const char procname[] = "tcpprobe";
-
-struct tcp_log {
- ktime_t tstamp;
- union {
- struct sockaddr raw;
- struct sockaddr_in v4;
- struct sockaddr_in6 v6;
- } src, dst;
- u16 length;
- u32 snd_nxt;
- u32 snd_una;
- u32 snd_wnd;
- u32 rcv_wnd;
- u32 snd_cwnd;
- u32 ssthresh;
- u32 srtt;
-};
-
-static struct {
- spinlock_t lock;
- wait_queue_head_t wait;
- ktime_t start;
- u32 lastcwnd;
-
- unsigned long head, tail;
- struct tcp_log *log;
-} tcp_probe;
-
-static inline int tcp_probe_used(void)
-{
- return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1);
-}
-
-static inline int tcp_probe_avail(void)
-{
- return bufsize - tcp_probe_used() - 1;
-}
-
-#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \
- do { \
- si4.sin_family = AF_INET; \
- si4.sin_port = inet->inet_##mem##port; \
- si4.sin_addr.s_addr = inet->inet_##mem##addr; \
- } while (0) \
-
-/*
- * Hook inserted to be called before each receive packet.
- * Note: arguments must match tcp_rcv_established()!
- */
-static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th)
-{
- unsigned int len = skb->len;
- const struct tcp_sock *tp = tcp_sk(sk);
- const struct inet_sock *inet = inet_sk(sk);
-
- /* Only update if port or skb mark matches */
- if (((port == 0 && fwmark == 0) ||
- ntohs(inet->inet_dport) == port ||
- ntohs(inet->inet_sport) == port ||
- (fwmark > 0 && skb->mark == fwmark)) &&
- (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
-
- spin_lock(&tcp_probe.lock);
- /* If log fills, just silently drop */
- if (tcp_probe_avail() > 1) {
- struct tcp_log *p = tcp_probe.log + tcp_probe.head;
-
- p->tstamp = ktime_get();
- switch (sk->sk_family) {
- case AF_INET:
- tcp_probe_copy_fl_to_si4(inet, p->src.v4, s);
- tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
- break;
- case AF_INET6:
- memset(&p->src.v6, 0, sizeof(p->src.v6));
- memset(&p->dst.v6, 0, sizeof(p->dst.v6));
-#if IS_ENABLED(CONFIG_IPV6)
- p->src.v6.sin6_family = AF_INET6;
- p->src.v6.sin6_port = inet->inet_sport;
- p->src.v6.sin6_addr = inet6_sk(sk)->saddr;
-
- p->dst.v6.sin6_family = AF_INET6;
- p->dst.v6.sin6_port = inet->inet_dport;
- p->dst.v6.sin6_addr = sk->sk_v6_daddr;
-#endif
- break;
- default:
- BUG();
- }
-
- p->length = len;
- p->snd_nxt = tp->snd_nxt;
- p->snd_una = tp->snd_una;
- p->snd_cwnd = tp->snd_cwnd;
- p->snd_wnd = tp->snd_wnd;
- p->rcv_wnd = tp->rcv_wnd;
- p->ssthresh = tcp_current_ssthresh(sk);
- p->srtt = tp->srtt_us >> 3;
-
- tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
- }
- tcp_probe.lastcwnd = tp->snd_cwnd;
- spin_unlock(&tcp_probe.lock);
-
- wake_up(&tcp_probe.wait);
- }
-
- jprobe_return();
-}
-
-static struct jprobe tcp_jprobe = {
- .kp = {
- .symbol_name = "tcp_rcv_established",
- },
- .entry = jtcp_rcv_established,
-};
-
-static int tcpprobe_open(struct inode *inode, struct file *file)
-{
- /* Reset (empty) log */
- spin_lock_bh(&tcp_probe.lock);
- tcp_probe.head = tcp_probe.tail = 0;
- tcp_probe.start = ktime_get();
- spin_unlock_bh(&tcp_probe.lock);
-
- return 0;
-}
-
-static int tcpprobe_sprint(char *tbuf, int n)
-{
- const struct tcp_log *p
- = tcp_probe.log + tcp_probe.tail;
- struct timespec64 ts
- = ktime_to_timespec64(ktime_sub(p->tstamp, tcp_probe.start));
-
- return scnprintf(tbuf, n,
- "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
- (unsigned long)ts.tv_sec,
- (unsigned long)ts.tv_nsec,
- &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
- p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
-}
-
-static ssize_t tcpprobe_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- int error = 0;
- size_t cnt = 0;
-
- if (!buf)
- return -EINVAL;
-
- while (cnt < len) {
- char tbuf[256];
- int width;
-
- /* Wait for data in buffer */
- error = wait_event_interruptible(tcp_probe.wait,
- tcp_probe_used() > 0);
- if (error)
- break;
-
- spin_lock_bh(&tcp_probe.lock);
- if (tcp_probe.head == tcp_probe.tail) {
- /* multiple readers race? */
- spin_unlock_bh(&tcp_probe.lock);
- continue;
- }
-
- width = tcpprobe_sprint(tbuf, sizeof(tbuf));
-
- if (cnt + width < len)
- tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1);
-
- spin_unlock_bh(&tcp_probe.lock);
-
- /* if record greater than space available
- return partial buffer (so far) */
- if (cnt + width >= len)
- break;
-
- if (copy_to_user(buf + cnt, tbuf, width))
- return -EFAULT;
- cnt += width;
- }
-
- return cnt == 0 ? error : cnt;
-}
-
-static const struct file_operations tcpprobe_fops = {
- .owner = THIS_MODULE,
- .open = tcpprobe_open,
- .read = tcpprobe_read,
- .llseek = noop_llseek,
-};
-
-static __init int tcpprobe_init(void)
-{
- int ret = -ENOMEM;
-
- /* Warning: if the function signature of tcp_rcv_established,
- * has been changed, you also have to change the signature of
- * jtcp_rcv_established, otherwise you end up right here!
- */
- BUILD_BUG_ON(__same_type(tcp_rcv_established,
- jtcp_rcv_established) == 0);
-
- init_waitqueue_head(&tcp_probe.wait);
- spin_lock_init(&tcp_probe.lock);
-
- if (bufsize == 0)
- return -EINVAL;
-
- bufsize = roundup_pow_of_two(bufsize);
- tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL);
- if (!tcp_probe.log)
- goto err0;
-
- if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops))
- goto err0;
-
- ret = register_jprobe(&tcp_jprobe);
- if (ret)
- goto err1;
-
- pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
- port, fwmark, bufsize);
- return 0;
- err1:
- remove_proc_entry(procname, init_net.proc_net);
- err0:
- kfree(tcp_probe.log);
- return ret;
-}
-module_init(tcpprobe_init);
-
-static __exit void tcpprobe_exit(void)
-{
- remove_proc_entry(procname, init_net.proc_net);
- unregister_jprobe(&tcp_jprobe);
- kfree(tcp_probe.log);
-}
-module_exit(tcpprobe_exit);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 3330a370d306..c61240e43923 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
/* Update the connection delivery information and generate a rate sample. */
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct rate_sample *rs)
+ bool is_sack_reneg, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 snd_us, ack_us;
@@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
- /* Return an invalid sample if no timing information is available. */
- if (!rs->prior_mstamp) {
+ /* Return an invalid sample if no timing information is available or
+ * in recovery from loss with SACK reneging. Rate samples taken during
+ * a SACK reneging event may overestimate bw by including packets that
+ * were SACKed before the reneg.
+ */
+ if (!rs->prior_mstamp || is_sack_reneg) {
rs->delivered = -1;
rs->interval_us = -1;
return;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index d3ea89020c69..3a81720ac0c4 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
* to queuing or delayed ACKs.
*/
reo_wnd = 1000;
- if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
+ if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) &&
+ min_rtt != ~0U) {
reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
}
@@ -79,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
*/
remaining = tp->rack.rtt_us + reo_wnd -
tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
- if (remaining < 0) {
+ if (remaining <= 0) {
tcp_rack_mark_skb_lost(sk, skb);
list_del_init(&skb->tcp_tsorted_anchor);
} else {
- /* Record maximum wait time (+1 to avoid 0) */
- *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
+ /* Record maximum wait time */
+ *reo_timeout = max_t(u32, *reo_timeout, remaining);
}
}
}
@@ -116,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
{
u32 rtt_us;
- if (tp->rack.mstamp &&
- !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
- end_seq, tp->rack.end_seq))
- return;
-
rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
- if (sacked & TCPCB_RETRANS) {
+ if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
* retransmission) was sacked.
@@ -133,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
* so it's at least one RTT (i.e., retransmission is at least
* an RTT later).
*/
- if (rtt_us < tcp_min_rtt(tp))
- return;
+ return;
}
- tp->rack.rtt_us = rtt_us;
- tp->rack.mstamp = xmit_time;
- tp->rack.end_seq = end_seq;
tp->rack.advanced = 1;
+ tp->rack.rtt_us = rtt_us;
+ if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
+ end_seq, tp->rack.end_seq)) {
+ tp->rack.mstamp = xmit_time;
+ tp->rack.end_seq = end_seq;
+ }
}
/* We have waited long enough to accommodate reordering. Mark the expired
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 16df6dd44b98..71fc60f1b326 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -48,11 +48,19 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
+ * Also close if our net namespace is exiting; in that case there is no
+ * hope of ever communicating again since all netns interfaces are already
+ * down (or about to be down), and we need to release our dst references,
+ * which have been moved to the netns loopback interface, so the namespace
+ * can finish exiting. This condition is only possible if we are a kernel
+ * socket, as those do not hold references to the namespace.
+ *
* Criteria is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
* 2. If we have strong memory pressure.
+ * 3. If our net namespace is exiting.
*/
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
@@ -81,6 +89,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
+
+ if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_done(sk);
+ return 1;
+ }
+
return 0;
}
@@ -183,11 +198,6 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits) {
dst_negative_advice(sk);
- if (tp->syn_fastopen || tp->syn_data)
- tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (tp->syn_data && icsk->icsk_retransmits == 1)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENACTIVEFAIL);
} else if (!tp->syn_data && !tp->syn_fastopen) {
sk_rethink_txhash(sk);
}
@@ -195,17 +205,6 @@ static int tcp_write_timeout(struct sock *sk)
expired = icsk->icsk_retransmits >= retry_until;
} else {
if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
- /* Some middle-boxes may black-hole Fast Open _after_
- * the handshake. Therefore we conservatively disable
- * Fast Open on this path on recurring timeouts after
- * successful Fast Open.
- */
- if (tp->syn_data_acked) {
- tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENACTIVEFAIL);
- }
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
@@ -228,11 +227,19 @@ static int tcp_write_timeout(struct sock *sk)
expired = retransmits_timed_out(sk, retry_until,
icsk->icsk_user_timeout);
}
+ tcp_fastopen_active_detect_blackhole(sk, expired);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
+ icsk->icsk_retransmits,
+ icsk->icsk_rto, (int)expired);
+
if (expired) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
}
+
return 0;
}
@@ -264,6 +271,7 @@ void tcp_delack_timer_handler(struct sock *sk)
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
+ tcp_mstamp_refresh(tcp_sk(sk));
tcp_send_ack(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
@@ -632,6 +640,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
+ tcp_mstamp_refresh(tp);
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
if (tp->linger2 >= 0) {
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e4ff25c947c5..f81f969f9c06 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -357,18 +357,12 @@ fail:
}
EXPORT_SYMBOL(udp_lib_get_port);
-static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
- unsigned int port)
-{
- return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
-}
-
int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
- udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
+ ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
unsigned int hash2_partial =
- udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
+ ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -445,7 +439,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
struct sk_buff *skb)
{
struct sock *sk, *result;
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
result = NULL;
@@ -454,23 +448,16 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
badness = score;
result = sk;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -488,11 +475,11 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp_lib_exact_dif_match(net, skb);
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
if (hslot->count > 10) {
- hash2 = udp4_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
@@ -503,7 +490,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
exact_dif, hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
- hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2))
@@ -526,23 +513,16 @@ begin:
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -997,8 +977,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!saddr)
saddr = inet->mc_addr;
connected = 0;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
if (connected)
rt = (struct rtable *)sk_dst_check(sk, 0);
@@ -1775,7 +1768,7 @@ EXPORT_SYMBOL(udp_lib_rehash);
static void udp_v4_rehash(struct sock *sk)
{
- u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+ u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num);
udp_lib_rehash(sk, new_hash);
@@ -1966,9 +1959,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sk_buff *nskb;
if (use_hash2) {
- hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
+ hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
udptable->mask;
- hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask;
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -2200,7 +2193,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
int dif, int sdif)
{
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
+ unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
@@ -2502,16 +2495,14 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* but then block when reading it. Add special case code
* to work around these arguably broken applications.
*/
-unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
- unsigned int mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
mask |= POLLIN | POLLRDNORM;
- sock_rps_record_flow(sk);
-
/* Check for false positives due to checksum errors */
if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
@@ -2736,7 +2727,6 @@ int udp4_seq_show(struct seq_file *seq, void *v)
}
static const struct file_operations udp_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = udp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 01801b77bd0d..ea6e6e7df0ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 59f10fe9782e..f96614e9b9a5 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -75,7 +75,6 @@ static struct inet_protosw udplite4_protosw = {
#ifdef CONFIG_PROC_FS
static const struct file_operations udplite_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = udp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee..bcfc00e88756 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
return xfrm4_extract_header(skb);
}
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dst_input(skb);
+}
+
static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
iph->tos, skb->dev))
goto drop;
}
- return dst_input(skb);
+
+ if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+ goto drop;
+
+ return 0;
drop:
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e6265e2c274e..63faeee989a9 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -62,7 +62,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- top_iph->ttl = ip4_dst_hoplimit(dst->child);
+ top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
@@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
@@ -105,18 +106,15 @@ static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x,
{
__skb_push(skb, skb->mac_len);
return skb_mac_gso_segment(skb, features);
-
}
static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- if (xo->flags & XFRM_GSO_SEGMENT) {
- skb->network_header = skb->network_header - x->props.header_len;
+ if (xo->flags & XFRM_GSO_SEGMENT)
skb->transport_header = skb->network_header +
sizeof(struct iphdr);
- }
skb_reset_mac_len(skb);
pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f49bd7897e95..e1846b97ee69 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -186,7 +186,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
+ bool send_na);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3438,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
} else if (event == NETDEV_CHANGE) {
if (!addrconf_link_ready(dev)) {
/* device is still not ready. */
+ rt6_sync_down_dev(dev, event);
break;
}
@@ -3449,6 +3451,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* multicast snooping switches
*/
ipv6_mc_up(idev);
+ rt6_sync_up(dev, RTNH_F_LINKDOWN);
break;
}
idev->if_flags |= IF_READY;
@@ -3484,6 +3487,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (run_pending)
addrconf_dad_run(idev);
+ /* Device has an address by now */
+ rt6_sync_up(dev, RTNH_F_DEAD);
+
/*
* If the MTU changed during the interface down,
* when the interface up, the changed MTU must be
@@ -3577,6 +3583,7 @@ static bool addr_is_local(const struct in6_addr *addr)
static int addrconf_ifdown(struct net_device *dev, int how)
{
+ unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
@@ -3586,8 +3593,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
ASSERT_RTNL();
- rt6_ifdown(net, dev);
- neigh_ifdown(&nd_tbl, dev);
+ rt6_disable_ip(dev, event);
idev = __in6_dev_get(dev);
if (!idev)
@@ -3833,12 +3839,17 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
idev->cnf.accept_dad < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
+ bool send_na = false;
+
+ if (ifp->flags & IFA_F_TENTATIVE &&
+ !(ifp->flags & IFA_F_OPTIMISTIC))
+ send_na = true;
bump_id = ifp->flags & IFA_F_TENTATIVE;
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
- addrconf_dad_completed(ifp, bump_id);
+ addrconf_dad_completed(ifp, bump_id, send_na);
return;
}
@@ -3967,16 +3978,21 @@ static void addrconf_dad_work(struct work_struct *w)
}
if (ifp->dad_probes == 0) {
+ bool send_na = false;
+
/*
* DAD was successful
*/
+ if (ifp->flags & IFA_F_TENTATIVE &&
+ !(ifp->flags & IFA_F_OPTIMISTIC))
+ send_na = true;
bump_id = ifp->flags & IFA_F_TENTATIVE;
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
write_unlock_bh(&idev->lock);
- addrconf_dad_completed(ifp, bump_id);
+ addrconf_dad_completed(ifp, bump_id, send_na);
goto out;
}
@@ -4014,7 +4030,8 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
return true;
}
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
+ bool send_na)
{
struct net_device *dev = ifp->idev->dev;
struct in6_addr lladdr;
@@ -4046,6 +4063,16 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
if (send_mld)
ipv6_mc_dad_complete(ifp->idev);
+ /* send unsolicited NA if enabled */
+ if (send_na &&
+ (ifp->idev->cnf.ndisc_notify ||
+ dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
+ ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
+ /*router=*/ !!ifp->idev->cnf.forwarding,
+ /*solicited=*/ false, /*override=*/ true,
+ /*inc_opt=*/ true);
+ }
+
if (send_rs) {
/*
* If a host as already performed a random delay
@@ -4209,7 +4236,6 @@ static int if6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations if6_fops = {
- .owner = THIS_MODULE,
.open = if6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -4352,9 +4378,11 @@ restart:
spin_lock(&ifpub->lock);
ifpub->regen_count = 0;
spin_unlock(&ifpub->lock);
+ rcu_read_unlock_bh();
ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
+ rcu_read_lock_bh();
goto restart;
}
} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
@@ -6595,27 +6623,45 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
- 0);
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
+ NULL, inet6_dump_ifinfo, 0);
if (err < 0)
goto errout;
- /* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
- inet6_dump_ifmcaddr, 0);
- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
- inet6_dump_ifacaddr, 0);
- __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
-
- ipv6_addr_label_rtnl_register();
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
+ inet6_rtm_newaddr, NULL, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
+ inet6_rtm_deladdr, NULL, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
+ inet6_rtm_getaddr, inet6_dump_ifaddr,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
+ NULL, inet6_dump_ifmcaddr, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
+ NULL, inet6_dump_ifacaddr, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
+ inet6_netconf_get_devconf,
+ inet6_netconf_dump_devconf,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (err < 0)
+ goto errout;
+ err = ipv6_addr_label_rtnl_register();
+ if (err < 0)
+ goto errout;
return 0;
errout:
+ rtnl_unregister_all(PF_INET6);
rtnl_af_unregister(&inet6_ops);
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 00e1f8ee08f8..1d6ced37ad71 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -547,13 +547,22 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
-void __init ipv6_addr_label_rtnl_register(void)
+int __init ipv6_addr_label_rtnl_register(void)
{
- __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
-}
+ int ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
+ ip6addrlbl_newdel,
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
+ return ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
+ ip6addrlbl_newdel,
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
+ return ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
+ ip6addrlbl_get,
+ ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
+ return ret;
+}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..416917719a6f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(net);
np->repflow = net->ipv6.sysctl.flowlabel_reflect;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
@@ -285,6 +284,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
+ bool saved_ipv6only;
int addr_type = 0;
int err = 0;
@@ -390,19 +390,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
+ saved_ipv6only = sk->sk_ipv6only;
+ if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
+ sk->sk_ipv6only = 1;
+
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
- if (addr_type != IPV6_ADDR_ANY) {
+ if (addr_type != IPV6_ADDR_ANY)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
- if (addr_type != IPV6_ADDR_MAPPED)
- sk->sk_ipv6only = 1;
- }
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0bbab8a4b5d8..8e085cc05aeb 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -533,7 +533,6 @@ static int ac6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ac6_seq_fops = {
- .owner = THIS_MODULE,
.open = ac6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a1f918713006..fbf08ce3f5ab 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -221,8 +221,7 @@ ipv4_connected:
if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
usin->sin6_scope_id) {
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id) {
+ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) {
err = -EINVAL;
goto out;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a902ff8f59be..97513f35bcc5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -141,14 +141,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
+ struct xfrm_offload *xo = xfrm_offload(skb);
void *tmp;
- struct dst_entry *dst = skb_dst(skb);
- struct xfrm_state *x = dst->xfrm;
+ struct xfrm_state *x;
+
+ if (xo && (xo->flags & XFRM_DEV_RESUME))
+ x = skb->sp->xvec[skb->sp->len - 1];
+ else
+ x = skb_dst(skb)->xfrm;
tmp = ESP_SKB_CB(skb)->tmp;
esp_ssg_unref(x, tmp);
kfree(tmp);
- xfrm_output_resume(skb, err);
+
+ if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+ if (err) {
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ kfree_skb(skb);
+ return;
+ }
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ secpath_reset(skb);
+ xfrm_dev_resume(skb);
+ } else {
+ xfrm_output_resume(skb, err);
+ }
}
/* Move ESP header back into place. */
@@ -734,17 +752,13 @@ static int esp_init_aead(struct xfrm_state *x)
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- u32 mask = 0;
err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
- if (x->xso.offload_handle)
- mask |= CRYPTO_ALG_ASYNC;
-
- aead = crypto_alloc_aead(aead_name, 0, mask);
+ aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -774,7 +788,6 @@ static int esp_init_authenc(struct xfrm_state *x)
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
- u32 mask = 0;
err = -EINVAL;
if (!x->ealg)
@@ -800,10 +813,7 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
}
- if (x->xso.offload_handle)
- mask |= CRYPTO_ALG_ASYNC;
-
- aead = crypto_alloc_aead(authenc_name, 0, mask);
+ aead = crypto_alloc_aead(authenc_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -890,13 +900,12 @@ static int esp6_init_state(struct xfrm_state *x)
x->props.header_len += IPV4_BEET_PHMAXLEN +
(sizeof(struct ipv6hdr) - sizeof(struct iphdr));
break;
+ default:
case XFRM_MODE_TRANSPORT:
break;
case XFRM_MODE_TUNNEL:
x->props.header_len += sizeof(struct ipv6hdr);
break;
- default:
- goto error;
}
align = ALIGN(crypto_aead_blocksize(aead), 4);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 333a478aa161..3fd1ec775dc2 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -60,7 +60,8 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
int nhoff;
int err;
- skb_pull(skb, offset);
+ if (!pskb_pull(skb, offset))
+ return NULL;
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
@@ -135,75 +136,39 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
- __u32 seq;
- int err = 0;
- struct sk_buff *skb2;
struct xfrm_state *x;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
- struct sk_buff *segs = ERR_PTR(-EINVAL);
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
if (!xo)
- goto out;
+ return ERR_PTR(-EINVAL);
- seq = xo->seq.low;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
if (esph->spi != x->id.spi)
- goto out;
+ return ERR_PTR(-EINVAL);
if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
- goto out;
+ return ERR_PTR(-EINVAL);
__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
skb->encap_hdr_csum = 1;
- if (!(features & NETIF_F_HW_ESP))
+ if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+ (x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
- segs = x->outer_mode->gso_segment(x, skb, esp_features);
- if (IS_ERR_OR_NULL(segs))
- goto out;
-
- __skb_pull(skb, skb->data - skb_mac_header(skb));
+ xo->flags |= XFRM_GSO_SEGMENT;
- skb2 = segs;
- do {
- struct sk_buff *nskb = skb2->next;
-
- xo = xfrm_offload(skb2);
- xo->flags |= XFRM_GSO_SEGMENT;
- xo->seq.low = seq;
- xo->seq.hi = xfrm_replay_seqhi(x, seq);
-
- if(!(features & NETIF_F_HW_ESP))
- xo->flags |= CRYPTO_FALLBACK;
-
- x->outer_mode->xmit(x, skb2);
-
- err = x->type_offload->xmit(x, skb2, esp_features);
- if (err) {
- kfree_skb_list(segs);
- return ERR_PTR(err);
- }
-
- if (!skb_is_gso(skb2))
- seq++;
- else
- seq += skb_shinfo(skb2)->gso_segs;
-
- skb_push(skb2, skb2->mac_len);
- skb2 = nskb;
- } while (skb2);
-
-out:
- return segs;
+ return x->outer_mode->gso_segment(x, skb, esp_features);
}
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -222,6 +187,7 @@ static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features)
{
+ int len;
int err;
int alen;
int blksize;
@@ -230,6 +196,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
struct crypto_aead *aead;
struct esp_info esp;
bool hw_offload = true;
+ __u32 seq;
esp.inplace = true;
@@ -265,28 +232,33 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
return esp.nfrags;
}
+ seq = xo->seq.low;
+
esph = ip_esp_hdr(skb);
esph->spi = x->id.spi;
skb_push(skb, -skb_network_offset(skb));
if (xo->flags & XFRM_GSO_SEGMENT) {
- esph->seq_no = htonl(xo->seq.low);
- } else {
- int len;
-
- len = skb->len - sizeof(struct ipv6hdr);
- if (len > IPV6_MAXPLEN)
- len = 0;
+ esph->seq_no = htonl(seq);
- ipv6_hdr(skb)->payload_len = htons(len);
+ if (!skb_is_gso(skb))
+ xo->seq.low++;
+ else
+ xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+
+ ipv6_hdr(skb)->payload_len = htons(len);
+
if (hw_offload)
return 0;
- esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
-
err = esp6_output_tail(x, skb, &esp);
if (err)
return err;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 83bd75713535..bc68eb661970 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
sr_phdr->segments[0] = **addr_p;
*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
+ if (sr_ihdr->hdrlen > hops * 2) {
+ int tlvs_offset, tlvs_length;
+
+ tlvs_offset = (1 + hops * 2) << 3;
+ tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
+ memcpy((char *)sr_phdr + tlvs_offset,
+ (char *)sr_ihdr + tlvs_offset, tlvs_length);
+ }
+
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(sr_phdr)) {
struct net *net = NULL;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 6eb5e68f112a..44c39c5f0638 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct ila_map *ila;
int ret;
- ret = rhashtable_walk_start(rhiter);
- if (ret && ret != -EAGAIN)
- goto done;
+ rhashtable_walk_start(rhiter);
for (;;) {
ila = rhashtable_walk_next(rhiter);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b01858f5deb1..2febe26de6a1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
/* called with rcu_read_lock() */
+static struct sock *inet6_lhash2_lookup(struct net *net,
+ struct inet_listen_hashbucket *ilb2,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport, const struct in6_addr *daddr,
+ const unsigned short hnum, const int dif, const int sdif)
+{
+ bool exact_dif = inet6_exact_dif_match(net, skb);
+ struct inet_connection_sock *icsk;
+ struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ u32 phash = 0;
+
+ inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ score = compute_score(sk, net, hnum, daddr, dif, sdif,
+ exact_dif);
+ if (score > hiscore) {
+ if (sk->sk_reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ result = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (result)
+ return result;
+ }
+ result = sk;
+ hiscore = score;
+ }
+ }
+
+ return result;
+}
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -134,31 +168,56 @@ struct sock *inet6_lookup_listener(struct net *net,
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- int score, hiscore = 0, matches = 0, reuseport = 0;
bool exact_dif = inet6_exact_dif_match(net, skb);
+ struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ unsigned int hash2;
u32 phash = 0;
+ if (ilb->count <= 10 || !hashinfo->lhash2)
+ goto port_lookup;
+
+ /* Too many sk in the ilb bucket (which is hashed by port alone).
+ * Try lhash2 (which is hashed by port and addr) instead.
+ */
+
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ result = inet6_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+ if (result)
+ return result;
+
+ /* Lookup lhash2 with in6addr_any */
+
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ return inet6_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+
+port_lookup:
sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
- matches = 1;
}
result = sk;
hiscore = score;
- } else if (score == hiscore && reuseport) {
- matches++;
- if (reciprocal_scale(phash, matches) == 0)
- result = sk;
- phash = next_pseudo_random32(phash);
}
}
return result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08..92b8d8c75eed 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -107,16 +107,13 @@ enum {
void fib6_update_sernum(struct rt6_info *rt)
{
- struct fib6_table *table = rt->rt6i_table;
struct net *net = dev_net(rt->dst.dev);
struct fib6_node *fn;
- spin_lock_bh(&table->tb6_lock);
fn = rcu_dereference_protected(rt->rt6i_node,
- lockdep_is_held(&table->tb6_lock));
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
if (fn)
fn->fn_sernum = fib6_new_sernum(net);
- spin_unlock_bh(&table->tb6_lock);
}
/*
@@ -640,6 +637,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
if (!(fn->fn_flags & RTN_RTINFO)) {
RCU_INIT_POINTER(fn->leaf, NULL);
rt6_release(leaf);
+ /* remove null_entry in the root node */
+ } else if (fn->fn_flags & RTN_TL_ROOT &&
+ rcu_access_pointer(fn->leaf) ==
+ net->ipv6.ip6_null_entry) {
+ RCU_INIT_POINTER(fn->leaf, NULL);
}
return fn;
@@ -799,12 +801,6 @@ insert_above:
return ln;
}
-static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
-{
- return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
- RTF_GATEWAY;
-}
-
static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
{
int i;
@@ -893,7 +889,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &fn->leaf;
for (iter = leaf; iter;
- iter = rcu_dereference_protected(iter->dst.rt6_next,
+ iter = rcu_dereference_protected(iter->rt6_next,
lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
/*
* Search for duplicates
@@ -950,7 +946,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
break;
next_iter:
- ins = &iter->dst.rt6_next;
+ ins = &iter->rt6_next;
}
if (fallback_ins && !found) {
@@ -979,7 +975,7 @@ next_iter:
&sibling->rt6i_siblings);
break;
}
- sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ sibling = rcu_dereference_protected(sibling->rt6_next,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
@@ -994,6 +990,7 @@ next_iter:
rt6i_nsiblings++;
}
BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ rt6_multipath_rebalance(temp_sibling);
}
/*
@@ -1009,7 +1006,7 @@ add:
if (err)
return err;
- rcu_assign_pointer(rt->dst.rt6_next, iter);
+ rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
@@ -1040,7 +1037,7 @@ add:
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
- rt->dst.rt6_next = iter->dst.rt6_next;
+ rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
rt, extack);
@@ -1059,14 +1056,14 @@ add:
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
- ins = &rt->dst.rt6_next;
+ ins = &rt->rt6_next;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
while (iter) {
if (iter->rt6i_metric > rt->rt6i_metric)
break;
if (rt6_qualify_for_ecmp(iter)) {
- *ins = iter->dst.rt6_next;
+ *ins = iter->rt6_next;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
@@ -1075,7 +1072,7 @@ add:
nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
- ins = &iter->dst.rt6_next;
+ ins = &iter->rt6_next;
}
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1102,8 +1099,8 @@ void fib6_force_start_gc(struct net *net)
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
-static void fib6_update_sernum_upto_root(struct rt6_info *rt,
- int sernum)
+static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+ int sernum)
{
struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1117,6 +1114,11 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
}
}
+void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+{
+ __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
@@ -1221,8 +1223,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
if (!rcu_access_pointer(fn->leaf)) {
- atomic_inc(&rt->rt6i_ref);
- rcu_assign_pointer(fn->leaf, rt);
+ if (fn->fn_flags & RTN_TL_ROOT) {
+ /* put back null_entry for root node */
+ rcu_assign_pointer(fn->leaf,
+ info->nl_net->ipv6.ip6_null_entry);
+ } else {
+ atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, rt);
+ }
}
fn = sn;
}
@@ -1230,7 +1238,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
err = fib6_add_rt2node(fn, rt, info, mxc, extack);
if (!err) {
- fib6_update_sernum_upto_root(rt, sernum);
+ __fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
}
@@ -1241,23 +1249,28 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
- lockdep_is_held(&table->tb6_lock));
- if (pn != fn && pn_leaf == rt) {
- pn_leaf = NULL;
- RCU_INIT_POINTER(pn->leaf, NULL);
- atomic_dec(&rt->rt6i_ref);
- }
- if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
- pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (pn != fn) {
+ struct rt6_info *pn_leaf =
+ rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ atomic_dec(&rt->rt6i_ref);
}
+ if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table,
+ pn);
+#if RT6_DEBUG >= 2
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf =
+ info->nl_net->ipv6.ip6_null_entry;
+ }
#endif
- atomic_inc(&pn_leaf->rt6i_ref);
- rcu_assign_pointer(pn->leaf, pn_leaf);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
+ }
}
#endif
goto failure;
@@ -1265,13 +1278,17 @@ out:
return err;
failure:
- /* fn->leaf could be NULL if fn is an intermediate node and we
- * failed to add the new route to it in both subtree creation
- * failure and fib6_add_rt2node() failure case.
- * In both cases, fib6_repair_tree() should be called to fix
- * fn->leaf.
+ /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
+ * 1. fn is an intermediate node and we failed to add the new
+ * route to it in both subtree creation failure and fib6_add_rt2node()
+ * failure case.
+ * 2. fn is the root node in the table and we fail to add the first
+ * default route to it.
*/
- if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+ if (fn &&
+ (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
+ (fn->fn_flags & RTN_TL_ROOT &&
+ !rcu_access_pointer(fn->leaf))))
fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
@@ -1526,6 +1543,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
struct fib6_walker *w;
int iter = 0;
+ /* Set fn->leaf to null_entry for root node. */
+ if (fn->fn_flags & RTN_TL_ROOT) {
+ rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+ return fn;
+ }
+
for (;;) {
struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
lockdep_is_held(&table->tb6_lock));
@@ -1644,7 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
/* Unlink it */
- *rtp = rt->dst.rt6_next;
+ *rtp = rt->rt6_next;
rt->rt6i_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1665,6 +1688,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
sibling->rt6i_nsiblings--;
rt->rt6i_nsiblings = 0;
list_del_init(&rt->rt6i_siblings);
+ rt6_multipath_rebalance(next_sibling);
}
/* Adjust walkers */
@@ -1672,7 +1696,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
- w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ w->leaf = rcu_dereference_protected(rt->rt6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
w->state = FWS_U;
@@ -1680,10 +1704,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
}
read_unlock(&net->ipv6.fib6_walker_lock);
- /* If it was last route, expunge its radix tree node */
+ /* If it was last route, call fib6_repair_tree() to:
+ * 1. For root node, put back null_entry as how the table was created.
+ * 2. For other nodes, expunge its radix tree node.
+ */
if (!rcu_access_pointer(fn->leaf)) {
- fn->fn_flags &= ~RTN_RTINFO;
- net->ipv6.rt6_stats->fib_route_nodes--;
+ if (!(fn->fn_flags & RTN_TL_ROOT)) {
+ fn->fn_flags &= ~RTN_RTINFO;
+ net->ipv6.rt6_stats->fib_route_nodes--;
+ }
fn = fib6_repair_tree(net, table, fn);
}
@@ -1731,7 +1760,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
fib6_del_route(table, fn, rtp, info);
return 0;
}
- rtp_next = &cur->dst.rt6_next;
+ rtp_next = &cur->rt6_next;
}
return -ENOENT;
}
@@ -1887,7 +1916,7 @@ static int fib6_clean_node(struct fib6_walker *w)
for_each_fib6_walker_rt(w) {
res = c->func(rt, c->arg);
- if (res < 0) {
+ if (res == -1) {
w->leaf = rt;
res = fib6_del(rt, &info);
if (res) {
@@ -1900,6 +1929,12 @@ static int fib6_clean_node(struct fib6_walker *w)
continue;
}
return 0;
+ } else if (res == -2) {
+ if (WARN_ON(!rt->rt6i_nsiblings))
+ continue;
+ rt = list_last_entry(&rt->rt6i_siblings,
+ struct rt6_info, rt6i_siblings);
+ continue;
}
WARN_ON(res != 0);
}
@@ -1911,7 +1946,8 @@ static int fib6_clean_node(struct fib6_walker *w)
* Convenient frontend to tree walker.
*
* func is called on each route.
- * It may return -1 -> delete this route.
+ * It may return -2 -> skip multipath route.
+ * -1 -> delete this route.
* 0 -> continue walking
*/
@@ -2103,7 +2139,6 @@ static void fib6_net_exit(struct net *net)
{
unsigned int i;
- rt6_ifdown(net, NULL);
del_timer_sync(&net->ipv6.ip6_fib_timer);
for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
@@ -2142,8 +2177,8 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
- 0);
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
+ inet6_dump_fib, 0);
if (ret)
goto out_unregister_subsys;
@@ -2208,7 +2243,7 @@ static int ipv6_route_yield(struct fib6_walker *w)
do {
iter->w.leaf = rcu_dereference_protected(
- iter->w.leaf->dst.rt6_next,
+ iter->w.leaf->rt6_next,
lockdep_is_held(&iter->tbl->tb6_lock));
iter->skip--;
if (!iter->skip && iter->w.leaf)
@@ -2274,7 +2309,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
+ n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
if (n) {
++*pos;
return n;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 7f59c8fabeeb..3dab664ff503 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -836,7 +836,6 @@ static int ip6fl_seq_release(struct inode *inode, struct file *file)
}
static const struct file_operations ip6fl_seq_fops = {
- .owner = THIS_MODULE,
.open = ip6fl_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..05f070e123e4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -55,6 +55,8 @@
#include <net/ip6_route.h>
#include <net/ip6_tunnel.h>
#include <net/gre.h>
+#include <net/erspan.h>
+#include <net/dst_metadata.h>
static bool log_ecn_error = true;
@@ -68,11 +70,13 @@ static unsigned int ip6gre_net_id __read_mostly;
struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
+ struct ip6_tnl __rcu *collect_md_tun;
struct net_device *fb_tunnel_dev;
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
static int ip6gre_tunnel_init(struct net_device *dev);
static void ip6gre_tunnel_setup(struct net_device *dev);
static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -121,7 +125,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
unsigned int h1 = HASH_KEY(key);
struct ip6_tnl *t, *cand = NULL;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ int dev_type = (gre_proto == htons(ETH_P_TEB) ||
+ gre_proto == htons(ETH_P_ERSPAN)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
@@ -226,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand)
return cand;
+ t = rcu_dereference(ign->collect_md_tun);
+ if (t && t->dev->flags & IFF_UP)
+ return t;
+
dev = ign->fb_tunnel_dev;
if (dev->flags & IFF_UP)
return netdev_priv(dev);
@@ -261,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, t);
+
rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -270,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, NULL);
+
for (tp = ip6gre_bucket(ign, t);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -337,11 +352,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, 1);
if (register_netdevice(dev) < 0)
goto failed_free;
+ ip6gre_tnl_link_config(nt, 1);
+
/* Can use a lockless transmit, unless we generate output sequences */
if (!(nt->parms.o_flags & TUNNEL_SEQ))
dev->features |= NETIF_F_LLTX;
@@ -460,7 +476,101 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto);
if (tunnel) {
- ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ if (tunnel->parms.collect_md) {
+ struct metadata_dst *tun_dst;
+ __be64 tun_id;
+ __be16 flags;
+
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ } else {
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ }
+
+ return PACKET_RCVD;
+ }
+
+ return PACKET_REJECT;
+}
+
+static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
+ struct tnl_ptk_info *tpi)
+{
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *pkt_md;
+ const struct ipv6hdr *ipv6h;
+ struct ip6_tnl *tunnel;
+ u8 ver;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
+ ipv6h = ipv6_hdr(skb);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ ver = ershdr->ver;
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
+
+ tunnel = ip6gre_tunnel_lookup(skb->dev,
+ &ipv6h->saddr, &ipv6h->daddr, tpi->key,
+ tpi->proto);
+ if (tunnel) {
+ int len = erspan_hdr_len(ver);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return PACKET_REJECT;
+
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ pkt_md = (struct erspan_metadata *)(ershdr + 1);
+
+ if (__iptunnel_pull_header(skb, len,
+ htons(ETH_P_TEB),
+ false, false) < 0)
+ return PACKET_REJECT;
+
+ if (tunnel->parms.collect_md) {
+ struct metadata_dst *tun_dst;
+ struct ip_tunnel_info *info;
+ struct erspan_metadata *md;
+ __be64 tun_id;
+ __be16 flags;
+
+ tpi->flags |= TUNNEL_KEY;
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
+ sizeof(*md));
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ info = &tun_dst->u.tun_info;
+ md = ip_tunnel_info_opts(info);
+
+ memcpy(md, pkt_md, sizeof(*md));
+ md->version = ver;
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ info->options_len = sizeof(*md);
+
+ ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+
+ } else {
+ tunnel->parms.erspan_ver = ver;
+
+ if (ver == 1) {
+ tunnel->parms.index = ntohl(pkt_md->u.index);
+ } else {
+ tunnel->parms.dir = pkt_md->u.md2.dir;
+ tunnel->parms.hwid = get_hwid(&pkt_md->u.md2);
+ }
+
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ }
return PACKET_RCVD;
}
@@ -481,9 +591,17 @@ static int gre_rcv(struct sk_buff *skb)
if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
goto drop;
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ tpi.proto == htons(ETH_P_ERSPAN2))) {
+ if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+ return 0;
+ goto out;
+ }
+
if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
+out:
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
drop:
kfree_skb(skb);
@@ -496,6 +614,78 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum)
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
+static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi6 *fl6, __u8 *dsfield,
+ int *encap_limit)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ *encap_limit = t->parms.encap_limit;
+
+ memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ *dsfield = ipv4_get_dsfield(iph);
+ else
+ *dsfield = ip6_tclass(t->parms.flowinfo);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6->flowi6_mark = skb->mark;
+ else
+ fl6->flowi6_mark = t->parms.fwmark;
+
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+}
+
+static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi6 *fl6, __u8 *dsfield,
+ int *encap_limit)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+ __u16 offset;
+
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ *encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ *encap_limit = t->parms.encap_limit;
+ }
+
+ memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ *dsfield = ipv6_get_dsfield(ipv6h);
+ else
+ *dsfield = ip6_tclass(t->parms.flowinfo);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6->flowlabel |= ip6_flowlabel(ipv6h);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6->flowi6_mark = skb->mark;
+ else
+ fl6->flowi6_mark = t->parms.fwmark;
+
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ return 0;
+}
+
static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
struct net_device *dev, __u8 dsfield,
struct flowi6 *fl6, int encap_limit,
@@ -517,8 +707,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
- protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+
+ if (tunnel->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ __be16 flags;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info ||
+ !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -EINVAL;
+
+ key = &tun_info->key;
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = IPPROTO_GRE;
+ fl6->daddr = key->u.ipv6.dst;
+ fl6->flowlabel = key->label;
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ dsfield = key->tos;
+ flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ tunnel->tun_hlen = gre_calc_hlen(flags);
+
+ gre_build_header(skb, tunnel->tun_hlen,
+ flags, protocol,
+ tunnel_id_to_key32(tun_info->key.tun_id), 0);
+
+ } else {
+ gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ protocol, tunnel->parms.o_key,
+ htonl(tunnel->o_seqno));
+ }
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
@@ -527,30 +747,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- const struct iphdr *iph = ip_hdr(skb);
int encap_limit = -1;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv4_get_dsfield(iph);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ if (!t->parms.collect_md)
+ prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ &dsfield, &encap_limit);
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -574,46 +781,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int encap_limit = -1;
- __u16 offset;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
return -1;
- offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
- ipv6h = ipv6_hdr(skb);
-
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
- return -1;
- }
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv6_get_dsfield(ipv6h);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= ip6_flowlabel(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ if (!t->parms.collect_md &&
+ prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
+ return -1;
if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
return -1;
@@ -660,7 +838,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ if (!t->parms.collect_md)
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -705,6 +884,137 @@ tx_err:
return NETDEV_TX_OK;
}
+static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device_stats *stats;
+ bool truncate = false;
+ int encap_limit = -1;
+ __u8 dsfield = false;
+ struct flowi6 fl6;
+ int err = -EINVAL;
+ __u32 mtu;
+
+ if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
+ goto tx_err;
+
+ if (gre_handle_offloads(skb, false))
+ goto tx_err;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ truncate = true;
+ }
+
+ t->parms.o_flags &= ~TUNNEL_KEY;
+ IPCB(skb)->flags = 0;
+
+ /* For collect_md mode, derive fl6 from the tunnel key,
+ * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}.
+ */
+ if (t->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct erspan_metadata *md;
+ __be32 tun_id;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info ||
+ !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -EINVAL;
+
+ key = &tun_info->key;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_GRE;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ dsfield = key->tos;
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto tx_err;
+
+ tun_id = tunnel_id_to_key32(key->tun_id);
+ if (md->version == 1) {
+ erspan_build_header(skb,
+ ntohl(tun_id),
+ ntohl(md->u.index), truncate,
+ false);
+ } else if (md->version == 2) {
+ erspan_build_header_v2(skb,
+ ntohl(tun_id),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, false);
+ }
+ } else {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ &dsfield, &encap_limit);
+ break;
+ case htons(ETH_P_IPV6):
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ goto tx_err;
+ if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
+ &dsfield, &encap_limit))
+ goto tx_err;
+ break;
+ default:
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ break;
+ }
+
+ if (t->parms.erspan_ver == 1)
+ erspan_build_header(skb, ntohl(t->parms.o_key),
+ t->parms.index,
+ truncate, false);
+ else
+ erspan_build_header_v2(skb, ntohl(t->parms.o_key),
+ t->parms.dir,
+ t->parms.hwid,
+ truncate, false);
+ fl6.daddr = t->parms.raddr;
+ }
+
+ /* Push GRE header. */
+ gre_build_header(skb, 8, TUNNEL_SEQ,
+ htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ NEXTHDR_GRE);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE) {
+ if (skb->protocol == htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ }
+
+ goto tx_err;
+ }
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats = &t->dev->stats;
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
{
struct net_device *dev = t->dev;
@@ -1014,6 +1324,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
eth_random_addr(dev->perm_addr);
}
+#define GRE6_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
+static void ip6gre_tnl_init_features(struct net_device *dev)
+{
+ struct ip6_tnl *nt = netdev_priv(dev);
+
+ dev->features |= GRE6_FEATURES;
+ dev->hw_features |= GRE6_FEATURES;
+
+ if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+ nt->encap.type == TUNNEL_ENCAP_NONE) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
+ /* Can use a lockless transmit, unless we generate
+ * output sequences
+ */
+ dev->features |= NETIF_F_LLTX;
+ }
+}
+
static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
@@ -1048,6 +1388,12 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ if (tunnel->parms.collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
+ ip6gre_tnl_init_features(dev);
+
return 0;
}
@@ -1062,6 +1408,9 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel = netdev_priv(dev);
+ if (tunnel->parms.collect_md)
+ return 0;
+
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
@@ -1084,7 +1433,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
dev_hold(dev);
}
-
static struct inet6_protocol ip6gre_protocol __read_mostly = {
.handler = gre_rcv,
.err_handler = ip6gre_err,
@@ -1099,7 +1447,8 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &ip6gre_link_ops ||
- dev->rtnl_link_ops == &ip6gre_tap_ops)
+ dev->rtnl_link_ops == &ip6gre_tap_ops ||
+ dev->rtnl_link_ops == &ip6erspan_tap_ops)
unregister_netdevice_queue(dev, head);
for (prio = 0; prio < 4; prio++) {
@@ -1221,6 +1570,70 @@ out:
return ip6gre_tunnel_validate(tb, data, extack);
}
+static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ __be16 flags = 0;
+ int ret, ver = 0;
+
+ if (!data)
+ return 0;
+
+ ret = ip6gre_tap_validate(tb, data, extack);
+ if (ret)
+ return ret;
+
+ /* ERSPAN should only have GRE sequence and key flag */
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (!data[IFLA_GRE_COLLECT_METADATA] &&
+ flags != (GRE_SEQ | GRE_KEY))
+ return -EINVAL;
+
+ /* ERSPAN Session ID only has 10-bit. Since we reuse
+ * 32-bit key field as ID, check it's range.
+ */
+ if (data[IFLA_GRE_IKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_OKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_ERSPAN_VER]) {
+ ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+ if (ver != 1 && ver != 2)
+ return -EINVAL;
+ }
+
+ if (ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+ if (index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+ } else if (ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR]) {
+ u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+
+ if (dir & ~(DIR_MASK >> DIR_OFFSET))
+ return -EINVAL;
+ }
+
+ if (data[IFLA_GRE_ERSPAN_HWID]) {
+ u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+
+ if (hwid & ~(HWID_MASK >> HWID_OFFSET))
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
@@ -1267,11 +1680,26 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_FWMARK])
parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+
+ if (data[IFLA_GRE_COLLECT_METADATA])
+ parms->collect_md = true;
+
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
}
static int ip6gre_tap_init(struct net_device *dev)
{
- struct ip6_tnl *tunnel;
int ret;
ret = ip6gre_tunnel_init_common(dev);
@@ -1280,10 +1708,6 @@ static int ip6gre_tap_init(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
-
- ip6gre_tnl_link_config(tunnel, 1);
-
return 0;
}
@@ -1298,16 +1722,65 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_get_iflink = ip6_tnl_get_iflink,
};
-#define GRE6_FEATURES (NETIF_F_SG | \
- NETIF_F_FRAGLIST | \
- NETIF_F_HIGHDMA | \
- NETIF_F_HW_CSUM)
+static int ip6erspan_tap_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+ int t_hlen;
+ int ret;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
+ strcpy(tunnel->parms.name, dev->name);
+
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (ret) {
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
+ }
+
+ tunnel->tun_hlen = 8;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ erspan_hdr_len(tunnel->parms.erspan_ver);
+ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->mtu -= ETH_HLEN;
+ if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ tunnel = netdev_priv(dev);
+ ip6gre_tnl_link_config(tunnel, 1);
+
+ return 0;
+}
+
+static const struct net_device_ops ip6erspan_netdev_ops = {
+ .ndo_init = ip6erspan_tap_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6erspan_tunnel_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_iflink = ip6_tnl_get_iflink,
+};
static void ip6gre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &ip6gre_tap_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
@@ -1372,40 +1845,29 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
ip6gre_netlink_parms(data, &nt->parms);
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
-
- dev->features |= GRE6_FEATURES;
- dev->hw_features |= GRE6_FEATURES;
-
- if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
- (nt->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
-
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
err = register_netdevice(dev);
if (err)
goto out;
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+ if (tb[IFLA_MTU])
+ ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
dev_hold(dev);
ip6gre_tunnel_link(ign, nt);
@@ -1492,8 +1954,12 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_COLLECT_METADATA */
+ nla_total_size(0) +
/* IFLA_GRE_FWMARK */
nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_INDEX */
+ nla_total_size(4) +
0;
}
@@ -1515,7 +1981,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
- nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
+ nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
+ nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1528,6 +1995,24 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (p->collect_md) {
+ if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+ goto nla_put_failure;
+
+ if (p->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ goto nla_put_failure;
+ } else if (p->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
@@ -1550,9 +2035,28 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
};
+static void ip6erspan_tap_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+
+ dev->netdev_ops = &ip6erspan_netdev_ops;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip6gre_dev_free;
+
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
+}
+
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.kind = "ip6gre",
.maxtype = IFLA_GRE_MAX,
@@ -1582,6 +2086,20 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
+ .kind = "ip6erspan",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6erspan_tap_setup,
+ .validate = ip6erspan_tap_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+ .get_link_net = ip6_tnl_get_link_net,
+};
+
/*
* And now the modules code and kernel interface.
*/
@@ -1610,9 +2128,15 @@ static int __init ip6gre_init(void)
if (err < 0)
goto tap_ops_failed;
+ err = rtnl_link_register(&ip6erspan_tap_ops);
+ if (err < 0)
+ goto erspan_link_failed;
+
out:
return err;
+erspan_link_failed:
+ rtnl_link_unregister(&ip6gre_tap_ops);
tap_ops_failed:
rtnl_link_unregister(&ip6gre_link_ops);
rtnl_link_failed:
@@ -1626,6 +2150,7 @@ static void __exit ip6gre_fini(void)
{
rtnl_link_unregister(&ip6gre_tap_ops);
rtnl_link_unregister(&ip6gre_link_ops);
+ rtnl_link_unregister(&ip6erspan_tap_ops);
inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
unregister_pernet_device(&ip6gre_net_ops);
}
@@ -1637,4 +2162,5 @@ MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
MODULE_ALIAS_RTNL_LINK("ip6gretap");
+MODULE_ALIAS_RTNL_LINK("ip6erspan");
MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..997c7f19ad62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -138,6 +138,14 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
return ret;
}
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+ /* Policy lookup after SNAT yielded a new policy */
+ if (skb_dst(skb)->xfrm) {
+ IPCB(skb)->flags |= IPSKB_REROUTED;
+ return dst_output(net, sk, skb);
+ }
+#endif
+
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -166,6 +174,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+ if (!np->autoflowlabel_set)
+ return ip6_default_np_autolabel(net);
+ else
+ return np->autoflowlabel;
+}
+
/*
* xmit an sk_buff (used by TCP, SCTP and DCCP)
* Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +246,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -362,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
@@ -382,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
@@ -1198,16 +1215,18 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(&rt->dst);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
+ if (mtu < IPV6_MIN_MTU)
+ return -EINVAL;
cork->base.fragsize = mtu;
- if (dst_allfrag(rt->dst.path))
+ if (dst_allfrag(xfrm_dst_path(&rt->dst)))
cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
@@ -1626,7 +1645,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -1725,11 +1744,13 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.flags = 0;
cork.base.addr = 0;
cork.base.opt = NULL;
+ cork.base.dst = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
- if (err)
+ if (err) {
+ ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
-
+ }
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3d3092adf1d2..4b15fe928278 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -642,8 +642,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
- rel_info);
+ skb_dst_update_pmtu(skb2, rel_info);
}
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -861,7 +860,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
struct metadata_dst *tun_dst,
bool log_ecn_err)
{
- return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
log_ecn_err);
}
EXPORT_SYMBOL(ip6_tnl_rcv);
@@ -904,7 +903,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
if (t->parms.collect_md) {
tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
if (!tun_dst)
- return 0;
+ goto drop;
}
ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
log_ecn_error);
@@ -979,6 +978,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
int ret = 0;
struct net *net = t->net;
+ if (t->parms.collect_md)
+ return 1;
+
if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
(ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
@@ -1074,10 +1076,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
@@ -1123,10 +1126,14 @@ route_lookup:
max_headroom += 8;
mtu -= 8;
}
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- if (skb_dst(skb) && !t->parms.collect_md)
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ } else if (mtu < 576) {
+ mtu = 576;
+ }
+
+ skb_dst_update_pmtu(skb, mtu);
if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
@@ -1671,11 +1678,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
- if (tnl->parms.proto == IPPROTO_IPIP) {
- if (new_mtu < ETH_MIN_MTU)
+ if (tnl->parms.proto == IPPROTO_IPV6) {
+ if (new_mtu < IPV6_MIN_MTU)
return -EINVAL;
} else {
- if (new_mtu < IPV6_MIN_MTU)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
}
if (new_mtu > 0xFFF8 - dev->hard_header_len)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index dbb74f3c57a7..fa3ae1cb50d3 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
mtu = dst_mtu(dst);
if (!skb->ignore_df && skb->len > mtu) {
- skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU)
@@ -626,6 +626,7 @@ static void vti6_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
+ struct net_device *tdev = NULL;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -638,6 +639,25 @@ static void vti6_link_config(struct ip6_tnl *t)
dev->flags |= IFF_POINTOPOINT;
else
dev->flags &= ~IFF_POINTOPOINT;
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ int strict = (ipv6_addr_type(&p->raddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+ struct rt6_info *rt = rt6_lookup(t->net,
+ &p->raddr, &p->laddr,
+ p->link, strict);
+
+ if (rt)
+ tdev = rt->dst.dev;
+ ip6_rt_put(rt);
+ }
+
+ if (!tdev && p->link)
+ tdev = __dev_get_by_index(t->net, p->link);
+
+ if (tdev)
+ dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
+ IPV6_MIN_MTU);
}
/**
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a2e1a864eb46..9f6cace9c817 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -477,7 +477,6 @@ static int ip6mr_vif_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip6mr_vif_fops = {
- .owner = THIS_MODULE,
.open = ip6mr_vif_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -495,6 +494,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
return ERR_PTR(-ENOENT);
it->mrt = mrt;
+ it->cache = NULL;
return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
: SEQ_START_TOKEN;
}
@@ -609,7 +609,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip6mr_mfc_fops = {
- .owner = THIS_MODULE,
.open = ipmr_mfc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1425,10 +1424,13 @@ int __init ip6_mr_init(void)
goto add_proto_fail;
}
#endif
- rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
- ip6mr_rtm_dumproute, 0);
- return 0;
+ err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
+ NULL, ip6mr_rtm_dumproute, 0);
+ if (err == 0)
+ return 0;
+
#ifdef CONFIG_IPV6_PIMSM_V2
+ inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
add_proto_fail:
unregister_netdevice_notifier(&ip6_mr_notifier);
#endif
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..d78d41fc4b1a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
break;
case IPV6_AUTOFLOWLABEL:
np->autoflowlabel = valbool;
+ np->autoflowlabel_set = 1;
retv = 0;
break;
case IPV6_RECVFRAGSIZE:
@@ -922,12 +923,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
- optname != IPV6_XFRM_POLICY) {
- lock_sock(sk);
- err = nf_setsockopt(sk, PF_INET6, optname, optval,
- optlen);
- release_sock(sk);
- }
+ optname != IPV6_XFRM_POLICY)
+ err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen);
#endif
return err;
}
@@ -957,12 +954,9 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
- optname != IPV6_XFRM_POLICY) {
- lock_sock(sk);
- err = compat_nf_setsockopt(sk, PF_INET6, optname,
- optval, optlen);
- release_sock(sk);
- }
+ optname != IPV6_XFRM_POLICY)
+ err = compat_nf_setsockopt(sk, PF_INET6, optname, optval,
+ optlen);
#endif
return err;
}
@@ -1335,7 +1329,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_AUTOFLOWLABEL:
- val = np->autoflowlabel;
+ val = ip6_autoflowlabel(sock_net(sk), np);
break;
case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fc6d7d143f2c..6a5d0e39bb87 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1655,8 +1655,6 @@ static void mld_sendpack(struct sk_buff *skb)
if (err)
goto err_out;
- payload_len = skb->len;
-
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
dst_output);
@@ -1682,16 +1680,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, struct mld2_grec **ppgr)
+ int type, struct mld2_grec **ppgr, unsigned int mtu)
{
- struct net_device *dev = pmc->idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr;
- if (!skb)
- skb = mld_newpack(pmc->idev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = mld_newpack(pmc->idev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = skb_put(skb, sizeof(struct mld2_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -1714,10 +1712,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV6_MIN_MTU)
+ return skb;
+
isquery = type == MLD2_MODE_IS_INCLUDE ||
type == MLD2_MODE_IS_EXCLUDE;
truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1741,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
}
}
first = 1;
@@ -1774,12 +1777,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -1814,7 +1817,7 @@ empty_source:
mld_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
@@ -2753,7 +2756,6 @@ static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations igmp6_mc_seq_fops = {
- .owner = THIS_MODULE,
.open = igmp6_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2908,7 +2910,6 @@ static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations igmp6_mcf_seq_fops = {
- .owner = THIS_MODULE,
.open = igmp6_mcf_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3cea200c85e..f61a5b613b52 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -566,6 +566,11 @@ static void ndisc_send_unsol_na(struct net_device *dev)
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ /* skip tentative addresses until dad completes */
+ if (ifa->flags & IFA_F_TENTATIVE &&
+ !(ifa->flags & IFA_F_OPTIMISTIC))
+ continue;
+
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 39970e212ad5..d95ceca7ff8f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,32 +68,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip6_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip6_rt_info {
- struct in6_addr daddr;
- struct in6_addr saddr;
- u_int32_t mark;
-};
-
-static void nf_ip6_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct ipv6hdr *iph = ipv6_hdr(skb);
-
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
-static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
+static int nf_ip6_reroute(struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +78,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
- return ip6_route_me_harder(net, skb);
+ return ip6_route_me_harder(entry->state.net, skb);
}
return 0;
}
@@ -190,25 +165,19 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
};
static const struct nf_ipv6_ops ipv6ops = {
- .chk_addr = ipv6_chk_addr,
- .route_input = ip6_route_input,
- .fragment = ip6_fragment
-};
-
-static const struct nf_afinfo nf_ip6_afinfo = {
- .family = AF_INET6,
+ .chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+ .fragment = ip6_fragment,
.checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
- .saveroute = nf_ip6_saveroute,
.reroute = nf_ip6_reroute,
- .route_key_size = sizeof(struct ip6_rt_info),
};
int __init ipv6_netfilter_init(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
- return nf_register_afinfo(&nf_ip6_afinfo);
+ return 0;
}
/* This can be called from inet6_init() on errors, so it cannot
@@ -217,5 +186,4 @@ int __init ipv6_netfilter_init(void)
void ipv6_netfilter_fini(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, NULL);
- nf_unregister_afinfo(&nf_ip6_afinfo);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6acb2eecd986..4a634b7a2c80 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,15 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV6
+ tristate "Netfilter flow table IPv6 module"
+ depends on NF_CONNTRACK && NF_TABLES
+ select NF_FLOW_TABLE
+ help
+ This option adds the flow table IPv6 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -232,6 +241,15 @@ config IP6_NF_MATCH_RT
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_MATCH_SRH
+ tristate '"srh" Segment Routing header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ srh matching allows you to match packets based on the segment
+ routing header of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
# The targets
config IP6_NF_TARGET_HL
tristate '"HL" hoplimit target support'
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c6ee0cdd0ba9..d984057b8395 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
@@ -54,6 +57,7 @@ obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
# targets
obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f06e25065a34..af4c917e0836 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -282,12 +282,7 @@ ip6t_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
- private = table->private;
- /*
- * Ensure we load private-> members after we've fetched the base
- * pointer.
- */
- smp_read_barrier_depends();
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -458,7 +453,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -992,9 +986,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(AF_INET6);
#endif
- t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
- "ip6table_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, AF_INET6, name);
+ if (!IS_ERR(t)) {
struct ip6t_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1024,7 +1017,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET6);
@@ -1050,7 +1043,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1061,7 +1054,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -1084,10 +1077,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
- "ip6table_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, AF_INET6, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1200,8 +1192,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
if (IS_ERR(paddc))
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET6, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1637,7 +1629,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1651,7 +1643,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(AF_INET6);
return ret;
@@ -1955,7 +1947,6 @@ static int __init ip6_tables_init(void)
if (ret < 0)
goto err5;
- pr_info("(C) 2000-2006 Netfilter Core Team\n");
return 0;
err5:
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 2b1a15846f9a..92c0047e7e33 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
if (range->flags & NF_NAT_RANGE_MAP_IPS)
return -EINVAL;
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target masquerade_tg6_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV6,
.checkentry = masquerade_tg6_checkentry,
+ .destroy = masquerade_tg6_destroy,
.target = masquerade_tg6,
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
new file mode 100644
index 000000000000..9642164107ce
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -0,0 +1,161 @@
+/* Kernel module to match Segment Routing Header (SRH) parameters. */
+
+/* Author:
+ * Ahmed Abdelsalam <amsalam20@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/ipv6.h>
+#include <net/seg6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6t_srh.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+/* Test a struct->mt_invflags and a boolean for inequality */
+#define NF_SRH_INVF(ptr, flag, boolean) \
+ ((boolean) ^ !!((ptr)->mt_invflags & (flag)))
+
+static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct ip6t_srh *srhinfo = par->matchinfo;
+ struct ipv6_sr_hdr *srh;
+ struct ipv6_sr_hdr _srh;
+ int hdrlen, srhoff = 0;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return false;
+ srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
+ if (!srh)
+ return false;
+
+ hdrlen = ipv6_optlen(srh);
+ if (skb->len - srhoff < hdrlen)
+ return false;
+
+ if (srh->type != IPV6_SRCRT_TYPE_4)
+ return false;
+
+ if (srh->segments_left > srh->first_segment)
+ return false;
+
+ /* Next Header matching */
+ if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
+ !(srh->nexthdr == srhinfo->next_hdr)))
+ return false;
+
+ /* Header Extension Length matching */
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
+ !(srh->hdrlen == srhinfo->hdr_len)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
+ !(srh->hdrlen > srhinfo->hdr_len)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
+ !(srh->hdrlen < srhinfo->hdr_len)))
+ return false;
+
+ /* Segments Left matching */
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
+ !(srh->segments_left == srhinfo->segs_left)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
+ !(srh->segments_left > srhinfo->segs_left)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
+ !(srh->segments_left < srhinfo->segs_left)))
+ return false;
+
+ /**
+ * Last Entry matching
+ * Last_Entry field was introduced in revision 6 of the SRH draft.
+ * It was called First_Segment in the previous revision
+ */
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
+ !(srh->first_segment == srhinfo->last_entry)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
+ !(srh->first_segment > srhinfo->last_entry)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
+ !(srh->first_segment < srhinfo->last_entry)))
+ return false;
+
+ /**
+ * Tag matchig
+ * Tag field was introduced in revision 6 of the SRH draft.
+ */
+ if (srhinfo->mt_flags & IP6T_SRH_TAG)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
+ !(srh->tag == srhinfo->tag)))
+ return false;
+ return true;
+}
+
+static int srh_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_srh *srhinfo = par->matchinfo;
+
+ if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
+ pr_err("unknown srh match flags %X\n", srhinfo->mt_flags);
+ return -EINVAL;
+ }
+
+ if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
+ pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match srh_mt6_reg __read_mostly = {
+ .name = "srh",
+ .family = NFPROTO_IPV6,
+ .match = srh_mt6,
+ .matchsize = sizeof(struct ip6t_srh),
+ .checkentry = srh_mt6_check,
+ .me = THIS_MODULE,
+};
+
+static int __init srh_mt6_init(void)
+{
+ return xt_register_match(&srh_mt6_reg);
+}
+
+static void __exit srh_mt6_exit(void)
+{
+ xt_unregister_match(&srh_mt6_reg);
+}
+
+module_init(srh_mt6_init);
+module_exit(srh_mt6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 Segment Routing Header match");
+MODULE_AUTHOR("Ahmed Abdelsalam <amsalam20@gmail.com>");
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 2b1a9dcdbcb3..b0524b18c4fb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
int err;
-#if 0
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)) {
- net_warn_ratelimited("ip6t_hook: happy cracking\n");
- return NF_ACCEPT;
- }
-#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 991512576c8c..47306e45a80a 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_in,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
},
@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_out,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
},
@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_local_fn,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = ip6table_nat_fn,
+ .nat_hook = true,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index d4bc56443dc1..710fa0806c37 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/slab.h>
@@ -11,6 +12,10 @@
static int __net_init ip6table_raw_table_init(struct net *net);
+static bool raw_before_defrag __read_mostly;
+MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
+module_param(raw_before_defrag, bool, 0000);
+
static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
@@ -20,6 +25,15 @@ static const struct xt_table packet_raw = {
.table_init = ip6table_raw_table_init,
};
+static const struct xt_table packet_raw_before_defrag = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+ .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
+ .table_init = ip6table_raw_table_init,
+};
+
/* The work comes in here from netfilter.c. */
static unsigned int
ip6table_raw_hook(void *priv, struct sk_buff *skb,
@@ -33,15 +47,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
static int __net_init ip6table_raw_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ const struct xt_table *table = &packet_raw;
int ret;
+ if (raw_before_defrag)
+ table = &packet_raw_before_defrag;
+
if (net->ipv6.ip6table_raw)
return 0;
- repl = ip6t_alloc_initial_table(&packet_raw);
+ repl = ip6t_alloc_initial_table(table);
if (repl == NULL)
return -ENOMEM;
- ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
+ ret = ip6t_register_table(net, table, repl, rawtable_ops,
&net->ipv6.ip6table_raw);
kfree(repl);
return ret;
@@ -62,9 +80,16 @@ static struct pernet_operations ip6table_raw_net_ops = {
static int __init ip6table_raw_init(void)
{
int ret;
+ const struct xt_table *table = &packet_raw;
+
+ if (raw_before_defrag) {
+ table = &packet_raw_before_defrag;
+
+ pr_info("Enabling raw table before defrag\n");
+ }
/* Register hooks */
- rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3b80a38f62b8..663827ee3cf8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr)) {
- net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
- return NF_ACCEPT;
- }
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
@@ -226,20 +221,27 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = {
static int
ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
- const struct inet_sock *inet = inet_sk(sk);
+ struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
const struct nf_conntrack_tuple_hash *h;
struct sockaddr_in6 sin6;
- struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
struct nf_conn *ct;
+ __be32 flow_label;
+ int bound_dev_if;
+ lock_sock(sk);
tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.in6 = sk->sk_v6_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.dst.protonum = sk->sk_protocol;
+ bound_dev_if = sk->sk_bound_dev_if;
+ flow_label = inet6->flow_label;
+ release_sock(sk);
- if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
+ if (tuple.dst.protonum != IPPROTO_TCP &&
+ tuple.dst.protonum != IPPROTO_SCTP)
return -ENOPROTOOPT;
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
@@ -257,14 +259,13 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
sin6.sin6_family = AF_INET6;
sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
- sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
+ sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
memcpy(&sin6.sin6_addr,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
sizeof(sin6.sin6_addr));
nf_ct_put(ct);
- sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
- sk->sk_bound_dev_if);
+ sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
}
@@ -368,7 +369,7 @@ static struct nf_sockopt_ops so_getorigdst6 = {
.owner = THIS_MODULE,
};
-static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
+static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 3ac0d826afc4..2548e2c8aedd 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -27,7 +27,7 @@
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
#include <net/netfilter/nf_log.h>
-static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
{
@@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.icmpv6.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_ICMPV6,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 977d8900cfd1..ce53dcfda88a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -231,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
if ((unsigned int)end > IPV6_MAXPLEN) {
pr_debug("offset is too large.\n");
- return -1;
+ return -EINVAL;
}
ecn = ip6_frag_ecn(ipv6_hdr(skb));
@@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- return -1;
+ return -EPROTO;
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
@@ -358,7 +358,7 @@ found:
discard_fq:
inet_frag_kill(&fq->q, &nf_frags);
err:
- return -1;
+ return -EINVAL;
}
/*
@@ -567,6 +567,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
+ u16 savethdr = skb->transport_header;
struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
@@ -600,8 +601,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
spin_lock_bh(&fq->q.lock);
- if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
- ret = -EINVAL;
+ ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+ if (ret < 0) {
+ if (ret == -EPROTO) {
+ skb->transport_header = savethdr;
+ ret = 0;
+ }
goto out_unlock;
}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index b326da59257f..c87b48359e8f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -63,6 +63,9 @@ static unsigned int ipv6_defrag(void *priv,
/* Previously seen (loopback)? */
if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
return NF_ACCEPT;
+
+ if (skb->_nfct == IP_CT_UNTRACKED)
+ return NF_ACCEPT;
#endif
err = nf_ct_frag6_gather(state->net, skb,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
new file mode 100644
index 000000000000..fff21602875a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,277 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return -1;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+{
+ u32 mtu;
+
+ mtu = ip6_dst_mtu_forward(&rt->dst);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct in6_addr *nexthop;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ipv6_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv6_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv6);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv6_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv6);
+}
+
+module_init(nf_flow_ipv6_module_init);
+module_exit(nf_flow_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 1d2fb9267d6f..bed57ee65f7b 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -369,10 +369,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -408,10 +404,6 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index d6e4ba5de916..17e03589331c 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,68 +22,12 @@ static unsigned int nft_do_chain_ipv6(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv6_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv6(priv, skb, state);
-}
-
-struct nft_af_info nft_af_ipv6 __read_mostly = {
- .family = NFPROTO_IPV6,
- .nhooks = NF_INET_NUMHOOKS,
- .owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
-};
-EXPORT_SYMBOL_GPL(nft_af_ipv6);
-
-static int nf_tables_ipv6_init_net(struct net *net)
-{
- net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.ipv6 == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
-
- if (nft_register_afinfo(net, net->nft.ipv6) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.ipv6);
- return -ENOMEM;
-}
-
-static void nf_tables_ipv6_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.ipv6);
- kfree(net->nft.ipv6);
-}
-
-static struct pernet_operations nf_tables_ipv6_net_ops = {
- .init = nf_tables_ipv6_init_net,
- .exit = nf_tables_ipv6_exit_net,
-};
-
static const struct nf_chain_type filter_ipv6 = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -94,26 +38,22 @@ static const struct nf_chain_type filter_ipv6 = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
};
static int __init nf_tables_ipv6_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_ipv6);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_ipv6);
-
- return ret;
+ return nft_register_chain_type(&filter_ipv6);
}
static void __exit nf_tables_ipv6_exit(void)
{
- unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
nft_unregister_chain_type(&filter_ipv6);
}
@@ -122,4 +62,4 @@ module_exit(nf_tables_ipv6_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_INET6);
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 443cd306c0b0..73fe2bd13fcf 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index f2727475895e..11d3c3b9aa18 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
u32 mark, flowlabel;
int err;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
/* save source/dest address, mark, hoplimit, flowlabel, priority */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 54b5899543ef..cc5174c7254c 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
{
const struct net_device *dev = NULL;
const struct nf_ipv6_ops *v6ops;
- const struct nf_afinfo *afinfo;
int route_err, addrtype;
struct rt6_info *rt;
struct flowi6 fl6 = {
@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
};
u32 ret = 0;
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (!afinfo)
+ v6ops = nf_get_ipv6_ops();
+ if (!v6ops)
return RTN_UNREACHABLE;
if (priv->flags & NFTA_FIB_F_IIF)
@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
- v6ops = nf_get_ipv6_ops();
- if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
ret = RTN_LOCAL;
- route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
- flowi6_to_flowi(&fl6), false);
+ route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+ flowi6_to_flowi(&fl6), false);
if (route_err)
goto err;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e88bcb8ff0fd..b67814242f78 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -58,7 +58,6 @@ static int sockstat6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations sockstat6_seq_fops = {
- .owner = THIS_MODULE,
.open = sockstat6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -248,7 +247,6 @@ static int snmp6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations snmp6_seq_fops = {
- .owner = THIS_MODULE,
.open = snmp6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -274,7 +272,6 @@ static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations snmp6_dev_seq_fops = {
- .owner = THIS_MODULE,
.open = snmp6_dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 761a473a07c5..ddda7eb3c623 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1308,7 +1308,6 @@ static int raw6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations raw6_seq_fops = {
- .owner = THIS_MODULE,
.open = raw6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..fb2d251c0500 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -186,7 +186,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
{
- return dst_metrics_write_ptr(rt->dst.from);
+ return dst_metrics_write_ptr(&rt->from->dst);
}
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -391,7 +391,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct rt6_exception_bucket *bucket;
- struct dst_entry *from = dst->from;
+ struct rt6_info *from = rt->from;
struct inet6_dev *idev;
dst_destroy_metrics_generic(dst);
@@ -409,8 +409,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
kfree(bucket);
}
- dst->from = NULL;
- dst_release(from);
+ rt->from = NULL;
+ dst_release(&from->dst);
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -443,9 +443,9 @@ static bool rt6_check_expired(const struct rt6_info *rt)
if (rt->rt6i_flags & RTF_EXPIRES) {
if (time_after(jiffies, rt->dst.expires))
return true;
- } else if (rt->dst.from) {
+ } else if (rt->from) {
return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
- rt6_check_expired((struct rt6_info *)rt->dst.from);
+ rt6_check_expired(rt->from);
}
return false;
}
@@ -455,7 +455,6 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
int strict)
{
struct rt6_info *sibling, *next_sibling;
- int route_choosen;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
@@ -463,26 +462,19 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
- route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
- /* Don't change the route, if route_choosen == 0
- * (siblings does not include ourself)
- */
- if (route_choosen)
- list_for_each_entry_safe(sibling, next_sibling,
- &match->rt6i_siblings, rt6i_siblings) {
- route_choosen--;
- if (route_choosen == 0) {
- struct inet6_dev *idev = sibling->rt6i_idev;
-
- if (!netif_carrier_ok(sibling->dst.dev) &&
- idev->cnf.ignore_routes_with_linkdown)
- break;
- if (rt6_score_route(sibling, oif, strict) < 0)
- break;
- match = sibling;
- break;
- }
- }
+ if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
+ return match;
+
+ list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
+ rt6i_siblings) {
+ if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
+ continue;
+ if (rt6_score_route(sibling, oif, strict) < 0)
+ break;
+ match = sibling;
+ break;
+ }
+
return match;
}
@@ -499,12 +491,15 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
struct rt6_info *local = NULL;
struct rt6_info *sprt;
- if (!oif && ipv6_addr_any(saddr))
- goto out;
+ if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
+ return rt;
- for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
+ for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
struct net_device *dev = sprt->dst.dev;
+ if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
+ continue;
+
if (oif) {
if (dev->ifindex == oif)
return sprt;
@@ -533,8 +528,8 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (flags & RT6_LOOKUP_F_IFACE)
return net->ipv6.ip6_null_entry;
}
-out:
- return rt;
+
+ return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -679,10 +674,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
int m;
bool match_do_rr = false;
struct inet6_dev *idev = rt->rt6i_idev;
- struct net_device *dev = rt->dst.dev;
- if (dev && !netif_carrier_ok(dev) &&
- idev->cnf.ignore_routes_with_linkdown &&
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ goto out;
+
+ if (idev->cnf.ignore_routes_with_linkdown &&
+ rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
@@ -721,7 +718,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
cont = NULL;
- for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
+ for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -731,7 +728,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
}
for (rt = leaf; rt && rt != rr_head;
- rt = rcu_dereference(rt->dst.rt6_next)) {
+ rt = rcu_dereference(rt->rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -743,7 +740,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
+ for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@@ -781,7 +778,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
+ struct rt6_info *next = rcu_dereference(rt0->rt6_next);
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -1054,7 +1051,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
*/
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = (struct rt6_info *)ort->dst.from;
+ ort = ort->from;
rcu_read_lock();
dev = ip6_rt_get_dev_rcu(ort);
@@ -1274,7 +1271,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
/* ort can't be a cache or pcpu route */
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = (struct rt6_info *)ort->dst.from;
+ ort = ort->from;
WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
spin_lock_bh(&rt6_exception_lock);
@@ -1346,7 +1343,9 @@ out:
/* Update fn->fn_sernum to invalidate all cached dst */
if (!err) {
+ spin_lock_bh(&ort->rt6i_table->tb6_lock);
fib6_update_sernum(ort);
+ spin_unlock_bh(&ort->rt6i_table->tb6_lock);
fib6_force_start_gc(net);
}
@@ -1415,8 +1414,8 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
/* Remove the passed in cached rt from the hash table that contains it */
int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_info *from = (struct rt6_info *)rt->dst.from;
struct rt6_exception_bucket *bucket;
+ struct rt6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
int err;
@@ -1460,8 +1459,8 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
*/
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
- struct rt6_info *from = (struct rt6_info *)rt->dst.from;
struct rt6_exception_bucket *bucket;
+ struct rt6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
@@ -1586,12 +1585,19 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
* EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
* expired, independently from their aging, as per RFC 8201 section 4
*/
- if (!(rt->rt6i_flags & RTF_EXPIRES) &&
- time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
+ if (!(rt->rt6i_flags & RTF_EXPIRES)) {
+ if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ } else if (time_after(jiffies, rt->dst.expires)) {
+ RT6_TRACE("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
- } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ }
+
+ if (rt->rt6i_flags & RTF_GATEWAY) {
struct neighbour *neigh;
__u8 neigh_flags = 0;
@@ -1606,11 +1612,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
rt6_remove_exception(bucket, rt6_ex);
return;
}
- } else if (__rt6_check_expired(rt)) {
- RT6_TRACE("purging expired route %p\n", rt);
- rt6_remove_exception(bucket, rt6_ex);
- return;
}
+
gc_args->more++;
}
@@ -1824,10 +1827,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
if (skb) {
ip6_multipath_l3_keys(skb, &hash_keys);
- return flow_hash_from_keys(&hash_keys);
+ return flow_hash_from_keys(&hash_keys) >> 1;
}
- return get_hash_from_flowi6(fl6);
+ return get_hash_from_flowi6(fl6) >> 1;
}
void ip6_route_input(struct sk_buff *skb)
@@ -1929,9 +1932,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
static void rt6_dst_from_metrics_check(struct rt6_info *rt)
{
- if (rt->dst.from &&
- dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
- dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+ if (rt->from &&
+ dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
}
static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
@@ -1951,7 +1954,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
{
if (!__rt6_check_expired(rt) &&
rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
- rt6_check((struct rt6_info *)(rt->dst.from), cookie))
+ rt6_check(rt->from, cookie))
return &rt->dst;
else
return NULL;
@@ -1971,7 +1974,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt6_dst_from_metrics_check(rt);
if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
+ (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
return rt6_dst_from_check(rt, cookie);
else
return rt6_check(rt, cookie);
@@ -2154,6 +2157,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ continue;
if (rt6_check_expired(rt))
continue;
if (rt->dst.error)
@@ -2336,6 +2341,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
}
rt->dst.flags |= DST_HOST;
+ rt->dst.input = ip6_input;
rt->dst.output = ip6_output;
rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
@@ -2343,7 +2349,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->rt6i_idev = idev;
dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
- /* Add this dst into uncached_list so that rt6_ifdown() can
+ /* Add this dst into uncached_list so that rt6_disable_ip() can
* do proper release of the net_device
*/
rt6_uncached_list_add(rt);
@@ -2438,7 +2444,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
- const struct in6_addr *gw_addr)
+ const struct in6_addr *gw_addr,
+ u32 tbid, int flags)
{
struct flowi6 fl6 = {
.flowi6_oif = cfg->fc_ifindex,
@@ -2447,15 +2454,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
};
struct fib6_table *table;
struct rt6_info *rt;
- int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
- table = fib6_get_table(net, cfg->fc_table);
+ table = fib6_get_table(net, tbid);
if (!table)
return NULL;
if (!ipv6_addr_any(&cfg->fc_prefsrc))
flags |= RT6_LOOKUP_F_HAS_SADDR;
+ flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
/* if table lookup failed, fall back to full lookup */
@@ -2467,6 +2474,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
+static int ip6_route_check_nh_onlink(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct rt6_info *grt;
+ int err;
+
+ err = 0;
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
+ if (grt) {
+ if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ err = -EINVAL;
+ }
+
+ ip6_rt_put(grt);
+ }
+
+ return err;
+}
+
+static int ip6_route_check_nh(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device **_dev,
+ struct inet6_dev **idev)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ struct net_device *dev = _dev ? *_dev : NULL;
+ struct rt6_info *grt = NULL;
+ int err = -EHOSTUNREACH;
+
+ if (cfg->fc_table) {
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr,
+ cfg->fc_table, flags);
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+
+ if (!grt)
+ goto out;
+
+ if (dev) {
+ if (dev != grt->dst.dev) {
+ ip6_rt_put(grt);
+ goto out;
+ }
+ } else {
+ *_dev = dev = grt->dst.dev;
+ *idev = grt->rt6i_idev;
+ dev_hold(dev);
+ in6_dev_hold(grt->rt6i_idev);
+ }
+
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
+ err = 0;
+
+ ip6_rt_put(grt);
+
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2518,6 +2601,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device required for onlink");
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+ }
+
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
!(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2592,6 +2690,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
#endif
rt->rt6i_metric = cfg->fc_metric;
+ rt->rt6i_nh_weight = 1;
/* We cannot add true routes via loopback here,
they would result in kernel looping; promote them to reject routes
@@ -2661,8 +2760,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt = NULL;
-
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
Otherwise, router will not able to send redirects.
@@ -2679,40 +2776,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
- if (cfg->fc_table) {
- grt = ip6_nh_lookup_table(net, cfg, gw_addr);
-
- if (grt) {
- if (grt->rt6i_flags & RTF_GATEWAY ||
- (dev && dev != grt->dst.dev)) {
- ip6_rt_put(grt);
- grt = NULL;
- }
- }
- }
-
- if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL,
- cfg->fc_ifindex, 1);
-
- err = -EHOSTUNREACH;
- if (!grt)
- goto out;
- if (dev) {
- if (dev != grt->dst.dev) {
- ip6_rt_put(grt);
- goto out;
- }
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ err = ip6_route_check_nh_onlink(net, cfg, dev,
+ extack);
} else {
- dev = grt->dst.dev;
- idev = grt->rt6i_idev;
- dev_hold(dev);
- in6_dev_hold(grt->rt6i_idev);
+ err = ip6_route_check_nh(net, cfg, &dev, &idev);
}
- if (!(grt->rt6i_flags & RTF_GATEWAY))
- err = 0;
- ip6_rt_put(grt);
-
if (err)
goto out;
}
@@ -2731,6 +2800,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!dev)
goto out;
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -2745,6 +2820,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->rt6i_flags = cfg->fc_flags;
install_route:
+ if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+ !netif_carrier_ok(dev))
+ rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
@@ -3055,11 +3134,11 @@ out:
static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
- BUG_ON(from->dst.from);
+ BUG_ON(from->from);
rt->rt6i_flags &= ~RTF_EXPIRES;
dst_hold(&from->dst);
- rt->dst.from = &from->dst;
+ rt->from = from;
dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
}
@@ -3458,37 +3537,249 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
fib6_clean_all(net, fib6_clean_tohost, gateway);
}
-struct arg_dev_net {
- struct net_device *dev;
- struct net *net;
+struct arg_netdev_event {
+ const struct net_device *dev;
+ union {
+ unsigned int nh_flags;
+ unsigned long event;
+ };
};
+static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ iter = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ while (iter) {
+ if (iter->rt6i_metric == rt->rt6i_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference_protected(iter->rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ }
+
+ return NULL;
+}
+
+static bool rt6_is_dead(const struct rt6_info *rt)
+{
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
+ (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+ rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+ return true;
+
+ return false;
+}
+
+static int rt6_multipath_total_weight(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ int total = 0;
+
+ if (!rt6_is_dead(rt))
+ total += rt->rt6i_nh_weight;
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+ if (!rt6_is_dead(iter))
+ total += iter->rt6i_nh_weight;
+ }
+
+ return total;
+}
+
+static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+{
+ int upper_bound = -1;
+
+ if (!rt6_is_dead(rt)) {
+ *weight += rt->rt6i_nh_weight;
+ upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
+ total) - 1;
+ }
+ atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+}
+
+static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+{
+ struct rt6_info *iter;
+ int weight = 0;
+
+ rt6_upper_bound_set(rt, &weight, total);
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ rt6_upper_bound_set(iter, &weight, total);
+}
+
+void rt6_multipath_rebalance(struct rt6_info *rt)
+{
+ struct rt6_info *first;
+ int total;
+
+ /* In case the entire multipath route was marked for flushing,
+ * then there is no need to rebalance upon the removal of every
+ * sibling route.
+ */
+ if (!rt->rt6i_nsiblings || rt->should_flush)
+ return;
+
+ /* During lookup routes are evaluated in order, so we need to
+ * make sure upper bounds are assigned from the first sibling
+ * onwards.
+ */
+ first = rt6_multipath_first_sibling(rt);
+ if (WARN_ON_ONCE(!first))
+ return;
+
+ total = rt6_multipath_total_weight(first);
+ rt6_multipath_upper_bound_set(first, total);
+}
+
+static int fib6_ifup(struct rt6_info *rt, void *p_arg)
+{
+ const struct arg_netdev_event *arg = p_arg;
+ const struct net *net = dev_net(arg->dev);
+
+ if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
+ rt->rt6i_nh_flags &= ~arg->nh_flags;
+ fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+ rt6_multipath_rebalance(rt);
+ }
+
+ return 0;
+}
+
+void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
+{
+ struct arg_netdev_event arg = {
+ .dev = dev,
+ {
+ .nh_flags = nh_flags,
+ },
+ };
+
+ if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
+ arg.nh_flags |= RTNH_F_LINKDOWN;
+
+ fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
+}
+
+static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
+ const struct net_device *dev)
+{
+ struct rt6_info *iter;
+
+ if (rt->dst.dev == dev)
+ return true;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ if (iter->dst.dev == dev)
+ return true;
+
+ return false;
+}
+
+static void rt6_multipath_flush(struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+
+ rt->should_flush = 1;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ iter->should_flush = 1;
+}
+
+static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+ const struct net_device *down_dev)
+{
+ struct rt6_info *iter;
+ unsigned int dead = 0;
+
+ if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
+ dead++;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ if (iter->dst.dev == down_dev ||
+ iter->rt6i_nh_flags & RTNH_F_DEAD)
+ dead++;
+
+ return dead;
+}
+
+static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+ const struct net_device *dev,
+ unsigned int nh_flags)
+{
+ struct rt6_info *iter;
+
+ if (rt->dst.dev == dev)
+ rt->rt6i_nh_flags |= nh_flags;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ if (iter->dst.dev == dev)
+ iter->rt6i_nh_flags |= nh_flags;
+}
+
/* called with write lock held for table with rt */
-static int fib6_ifdown(struct rt6_info *rt, void *arg)
+static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
{
- const struct arg_dev_net *adn = arg;
- const struct net_device *dev = adn->dev;
+ const struct arg_netdev_event *arg = p_arg;
+ const struct net_device *dev = arg->dev;
+ const struct net *net = dev_net(dev);
- if ((rt->dst.dev == dev || !dev) &&
- rt != adn->net->ipv6.ip6_null_entry &&
- (rt->rt6i_nsiblings == 0 ||
- (dev && netdev_unregistering(dev)) ||
- !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
- return -1;
+ if (rt == net->ipv6.ip6_null_entry)
+ return 0;
+
+ switch (arg->event) {
+ case NETDEV_UNREGISTER:
+ return rt->dst.dev == dev ? -1 : 0;
+ case NETDEV_DOWN:
+ if (rt->should_flush)
+ return -1;
+ if (!rt->rt6i_nsiblings)
+ return rt->dst.dev == dev ? -1 : 0;
+ if (rt6_multipath_uses_dev(rt, dev)) {
+ unsigned int count;
+
+ count = rt6_multipath_dead_count(rt, dev);
+ if (rt->rt6i_nsiblings + 1 == count) {
+ rt6_multipath_flush(rt);
+ return -1;
+ }
+ rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
+ RTNH_F_LINKDOWN);
+ fib6_update_sernum(rt);
+ rt6_multipath_rebalance(rt);
+ }
+ return -2;
+ case NETDEV_CHANGE:
+ if (rt->dst.dev != dev ||
+ rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+ break;
+ rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt6_multipath_rebalance(rt);
+ break;
+ }
return 0;
}
-void rt6_ifdown(struct net *net, struct net_device *dev)
+void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
{
- struct arg_dev_net adn = {
+ struct arg_netdev_event arg = {
.dev = dev,
- .net = net,
+ {
+ .event = event,
+ },
};
- fib6_clean_all(net, fib6_ifdown, &adn);
- if (dev)
- rt6_uncached_list_flush_dev(net, dev);
+ fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
+}
+
+void rt6_disable_ip(struct net_device *dev, unsigned long event)
+{
+ rt6_sync_down_dev(dev, event);
+ rt6_uncached_list_flush_dev(dev_net(dev), dev);
+ neigh_ifdown(&nd_tbl, dev);
}
struct rt6_mtu_change_arg {
@@ -3602,6 +3893,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_flags & RTM_F_CLONED)
cfg->fc_flags |= RTF_CACHE;
+ cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -3811,6 +4104,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
+ rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
dst_release_immediate(&rt->dst);
@@ -3991,7 +4286,10 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
unsigned int *flags, bool skip_oif)
{
- if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ *flags |= RTNH_F_DEAD;
+
+ if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
*flags |= RTNH_F_LINKDOWN;
if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
*flags |= RTNH_F_DEAD;
@@ -4002,6 +4300,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
@@ -4030,7 +4329,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
if (!rtnh)
goto nla_put_failure;
- rtnh->rtnh_hops = 0;
+ rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
@@ -4297,19 +4596,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- if (!fibmatch)
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
- else
- dst = ip6_route_lookup(net, &fl6, 0);
+ dst = ip6_route_input_lookup(net, dev, &fl6, flags);
rcu_read_unlock();
} else {
fl6.flowi6_oif = oif;
- if (!fibmatch)
- dst = ip6_route_output(net, NULL, &fl6);
- else
- dst = ip6_route_lookup(net, &fl6, 0);
+ dst = ip6_route_output(net, NULL, &fl6);
}
@@ -4326,6 +4619,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
+ if (fibmatch && rt->from) {
+ struct rt6_info *ort = rt->from;
+
+ dst_hold(&ort->dst);
+ ip6_rt_put(rt);
+ rt = ort;
+ }
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
ip6_rt_put(rt);
@@ -4423,7 +4724,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
#ifdef CONFIG_PROC_FS
static const struct file_operations ipv6_route_proc_fops = {
- .owner = THIS_MODULE,
.open = ipv6_route_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -4451,7 +4751,6 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations rt6_stats_seq_fops = {
- .owner = THIS_MODULE,
.open = rt6_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -4596,8 +4895,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_null_entry)
goto out_ip6_dst_entries;
- net->ipv6.ip6_null_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
ip6_template_metrics, true);
@@ -4609,8 +4906,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_prohibit_entry)
goto out_ip6_null_entry;
- net->ipv6.ip6_prohibit_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
ip6_template_metrics, true);
@@ -4620,8 +4915,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_blk_hole_entry)
goto out_ip6_prohibit_entry;
- net->ipv6.ip6_blk_hole_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
@@ -4778,11 +5071,20 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = -ENOBUFS;
- if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
- __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
- __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED))
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
+ inet6_rtm_newroute, NULL, 0);
+ if (ret < 0)
+ goto out_register_late_subsys;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
+ inet6_rtm_delroute, NULL, 0);
+ if (ret < 0)
+ goto out_register_late_subsys;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
+ inet6_rtm_getroute, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
goto out_register_late_subsys;
ret = register_netdevice_notifier(&ip6_route_dev_notifier);
@@ -4800,6 +5102,7 @@ out:
return ret;
out_register_late_subsys:
+ rtnl_unregister_all(PF_INET6);
unregister_pernet_subsys(&ip6_route_net_late_ops);
fib6_rules_init:
fib6_rules_cleanup();
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index c81407770956..7f5621d09571 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
struct seg6_hmac_info *hinfo;
int ret;
- ret = rhashtable_walk_start(iter);
- if (ret && ret != -EAGAIN)
- goto done;
+ rhashtable_walk_start(iter);
for (;;) {
hinfo = rhashtable_walk_next(iter);
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 825b8e01f947..ba3767ef5e93 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -501,7 +501,7 @@ static struct seg6_action_desc *__get_action_desc(int action)
struct seg6_action_desc *desc;
int i, count;
- count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+ count = ARRAY_SIZE(seg6_action_table);
for (i = 0; i < count; i++) {
desc = &seg6_action_table[i];
if (desc->action == action)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d60ddcb0bfe2..3873d3877135 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -934,8 +934,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
df = 0;
}
- if (tunnel->parms.iph.daddr && skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (tunnel->parms.iph.daddr)
+ skb_dst_update_pmtu(skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
ipip6_tunnel_link(sitn, t);
t->parms.iph.ttl = p->iph.ttl;
t->parms.iph.tos = p->iph.tos;
+ t->parms.iph.frag_off = p->iph.frag_off;
if (t->parms.link != p->link || t->fwmark != fwmark) {
t->parms.link = p->link;
t->fwmark = fwmark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6bb98c93edfe..a1ab29e2ab3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -176,8 +176,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
/* If interface is set while binding, indices
* must coincide.
*/
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id)
+ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
return -EINVAL;
sk->sk_bound_dev_if = usin->sin6_scope_id;
@@ -994,7 +993,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
@@ -1454,7 +1453,6 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- tcp_v6_fill_cb(skb, hdr, th);
if (tcp_v6_inbound_md5_hash(sk, skb)) {
sk_drops_add(sk, skb);
reqsk_put(req);
@@ -1467,8 +1465,12 @@ process:
sock_hold(sk);
refcounted = true;
nsk = NULL;
- if (!tcp_filter(sk, skb))
+ if (!tcp_filter(sk, skb)) {
+ th = (const struct tcphdr *)skb->data;
+ hdr = ipv6_hdr(skb);
+ tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false);
+ }
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
@@ -1492,8 +1494,6 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- tcp_v6_fill_cb(skb, hdr, th);
-
if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard_and_relse;
@@ -1501,6 +1501,7 @@ process:
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
+ tcp_v6_fill_cb(skb, hdr, th);
skb->dev = NULL;
@@ -1590,7 +1591,6 @@ do_time_wait:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v6_restore_cb(skb);
tcp_v6_send_reset(sk, skb);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
@@ -1794,7 +1794,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
timer_expires = jiffies;
}
- state = sk_state_load(sp);
+ state = inet_sk_state_load(sp);
if (state == TCP_LISTEN)
rx_queue = sp->sk_ack_backlog;
else
@@ -1883,7 +1883,6 @@ out:
}
static const struct file_operations tcp6_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = tcp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c9204c01..278e49cd67d4 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
{
struct tcphdr *th;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f30fa313bf2..52e3ea0e6f50 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-static u32 udp6_portaddr_hash(const struct net *net,
- const struct in6_addr *addr6,
- unsigned int port)
-{
- unsigned int hash, mix = net_hash_mix(net);
-
- if (ipv6_addr_any(addr6))
- hash = jhash_1word(0, mix);
- else if (ipv6_addr_v4mapped(addr6))
- hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
- else
- hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
-
- return hash ^ port;
-}
-
int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
- udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+ ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
unsigned int hash2_partial =
- udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
+ ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
static void udp_v6_rehash(struct sock *sk)
{
- u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+ u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
@@ -184,7 +168,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
struct udp_hslot *hslot2, struct sk_buff *skb)
{
struct sock *sk, *result;
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
result = NULL;
@@ -193,8 +177,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
@@ -202,15 +185,9 @@ static struct sock *udp6_lib_lookup2(struct net *net,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -228,11 +205,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
if (hslot->count > 10) {
- hash2 = udp6_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
@@ -243,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
- hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2))
@@ -267,23 +244,16 @@ begin:
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -719,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sk_buff *nskb;
if (use_hash2) {
- hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+ hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
udptable->mask;
- hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask;
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -909,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
int dif, int sdif)
{
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
+ unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
@@ -1509,7 +1479,6 @@ int udp6_seq_show(struct seq_file *seq, void *v)
}
static const struct file_operations udp6_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = udp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a0f89ad76f9d..2a04dc9c781b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 2784cc363f2b..14ae32bb1f3d 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -94,7 +94,6 @@ void udplitev6_exit(void)
#ifdef CONFIG_PROC_FS
static const struct file_operations udplite6_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = udp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..841f4a07438e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
}
EXPORT_SYMBOL(xfrm6_rcv_spi);
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (xfrm_trans_queue(skb, ip6_rcv_finish))
+ __kfree_skb(skb);
+ return -1;
+}
+
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- ip6_rcv_finish);
+ xfrm6_transport_finish2);
return -1;
}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 02556e356f87..bb935a3b7fea 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -59,7 +59,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
- top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
+ top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
return 0;
@@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
@@ -105,17 +106,14 @@ static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
{
__skb_push(skb, skb->mac_len);
return skb_mac_gso_segment(skb, features);
-
}
static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- if (xo->flags & XFRM_GSO_SEGMENT) {
- skb->network_header = skb->network_header - x->props.header_len;
+ if (xo->flags & XFRM_GSO_SEGMENT)
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
- }
skb_reset_mac_len(skb);
pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 885ade234a49..09fb44ee3b45 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -265,7 +265,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
in6_dev_put(xdst->u.rt6.rt6i_idev);
xdst->u.rt6.rt6i_idev = loopback_idev;
in6_dev_hold(loopback_idev);
- xdst = (struct xfrm_dst *)xdst->u.dst.child;
+ xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst);
} while (xdst->u.dst.xfrm);
__in6_dev_put(loopback_idev);
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
deleted file mode 100644
index e9ad0062fbb6..000000000000
--- a/net/ipx/Kconfig
+++ /dev/null
@@ -1,60 +0,0 @@
-#
-# IPX configuration
-#
-config IPX
- tristate "The IPX protocol"
- select LLC
- ---help---
- This is support for the Novell networking protocol, IPX, commonly
- used for local networks of Windows machines. You need it if you
- want to access Novell NetWare file or print servers using the Linux
- Novell client ncpfs (available from
- <ftp://platan.vc.cvut.cz/pub/linux/ncpfs/>) or from
- within the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO,
- available from <http://www.tldp.org/docs.html#howto>). In order
- to do the former, you'll also have to say Y to "NCP file system
- support", below.
-
- IPX is similar in scope to IP, while SPX, which runs on top of IPX,
- is similar to TCP.
-
- To turn your Linux box into a fully featured NetWare file server and
- IPX router, say Y here and fetch either lwared from
- <ftp://ibiblio.org/pub/Linux/system/network/daemons/> or
- mars_nwe from <ftp://www.compu-art.de/mars_nwe/>. For more
- information, read the IPX-HOWTO available from
- <http://www.tldp.org/docs.html#howto>.
-
- The IPX driver would enlarge your kernel by about 16 KB. To compile
- this driver as a module, choose M here: the module will be called ipx.
- Unless you want to integrate your Linux box with a local Novell
- network, say N.
-
-config IPX_INTERN
- bool "IPX: Full internal IPX network"
- depends on IPX
- ---help---
- Every IPX network has an address that identifies it. Sometimes it is
- useful to give an IPX "network" address to your Linux box as well
- (for example if your box is acting as a file server for different
- IPX networks: it will then be accessible from everywhere using the
- same address). The way this is done is to create a virtual internal
- "network" inside your box and to assign an IPX address to this
- network. Say Y here if you want to do this; read the IPX-HOWTO at
- <http://www.tldp.org/docs.html#howto> for details.
-
- The full internal IPX network enables you to allocate sockets on
- different virtual nodes of the internal network. This is done by
- evaluating the field sipx_node of the socket address given to the
- bind call. So applications should always initialize the node field
- to 0 when binding a socket on the primary network. In this case the
- socket is assigned the default node that has been given to the
- kernel when the internal network was created. By enabling the full
- internal IPX network the cross-forwarding of packets targeted at
- 'special' sockets to sockets listening on the primary network is
- disabled. This might break existing applications, especially RIP/SAP
- daemons. A RIP/SAP daemon that works well with the full internal net
- can be found on <ftp://ftp.gwdg.de/pub/linux/misc/ncpfs/>.
-
- If you don't know what you are doing, say N.
-
diff --git a/net/ipx/Makefile b/net/ipx/Makefile
deleted file mode 100644
index 440fafa9fd07..000000000000
--- a/net/ipx/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# Makefile for the Linux IPX layer.
-#
-
-obj-$(CONFIG_IPX) += ipx.o
-
-ipx-y := af_ipx.o ipx_route.o ipx_proc.o pe2.o
-ipx-$(CONFIG_SYSCTL) += sysctl_net_ipx.o
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
deleted file mode 100644
index d21a9d128d3e..000000000000
--- a/net/ipx/af_ipx.c
+++ /dev/null
@@ -1,2084 +0,0 @@
-/*
- * Implements an IPX socket layer.
- *
- * This code is derived from work by
- * Ross Biro : Writing the original IP stack
- * Fred Van Kempen : Tidying up the TCP/IP
- *
- * Many thanks go to Keith Baker, Institute For Industrial Information
- * Technology Ltd, Swansea University for allowing me to work on this
- * in my own time even though it was in some ways related to commercial
- * work I am currently employed to do there.
- *
- * All the material in this file is subject to the Gnu license version 2.
- * Neither Alan Cox nor the Swansea University Computer Society admit
- * liability nor provide warranty for any of this software. This material
- * is provided as is and at no charge.
- *
- * Portions Copyright (c) 2000-2003 Conectiva, Inc. <acme@conectiva.com.br>
- * Neither Arnaldo Carvalho de Melo nor Conectiva, Inc. admit liability nor
- * provide warranty for any of this software. This material is provided
- * "AS-IS" and at no charge.
- *
- * Portions Copyright (c) 1995 Caldera, Inc. <greg@caldera.com>
- * Neither Greg Page nor Caldera, Inc. admit liability nor provide
- * warranty for any of this software. This material is provided
- * "AS-IS" and at no charge.
- *
- * See net/ipx/ChangeLog.
- */
-
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/init.h>
-#include <linux/ipx.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/uio.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/termios.h>
-
-#include <net/ipx.h>
-#include <net/p8022.h>
-#include <net/psnap.h>
-#include <net/sock.h>
-#include <net/datalink.h>
-#include <net/tcp_states.h>
-#include <net/net_namespace.h>
-
-#include <linux/uaccess.h>
-
-/* Configuration Variables */
-static unsigned char ipxcfg_max_hops = 16;
-static char ipxcfg_auto_select_primary;
-static char ipxcfg_auto_create_interfaces;
-int sysctl_ipx_pprop_broadcasting = 1;
-
-/* Global Variables */
-static struct datalink_proto *p8022_datalink;
-static struct datalink_proto *pEII_datalink;
-static struct datalink_proto *p8023_datalink;
-static struct datalink_proto *pSNAP_datalink;
-
-static const struct proto_ops ipx_dgram_ops;
-
-LIST_HEAD(ipx_interfaces);
-DEFINE_SPINLOCK(ipx_interfaces_lock);
-
-struct ipx_interface *ipx_primary_net;
-struct ipx_interface *ipx_internal_net;
-
-struct ipx_interface *ipx_interfaces_head(void)
-{
- struct ipx_interface *rc = NULL;
-
- if (!list_empty(&ipx_interfaces))
- rc = list_entry(ipx_interfaces.next,
- struct ipx_interface, node);
- return rc;
-}
-
-static void ipxcfg_set_auto_select(char val)
-{
- ipxcfg_auto_select_primary = val;
- if (val && !ipx_primary_net)
- ipx_primary_net = ipx_interfaces_head();
-}
-
-static int ipxcfg_get_config_data(struct ipx_config_data __user *arg)
-{
- struct ipx_config_data vals;
-
- vals.ipxcfg_auto_create_interfaces = ipxcfg_auto_create_interfaces;
- vals.ipxcfg_auto_select_primary = ipxcfg_auto_select_primary;
-
- return copy_to_user(arg, &vals, sizeof(vals)) ? -EFAULT : 0;
-}
-
-/*
- * Note: Sockets may not be removed _during_ an interrupt or inet_bh
- * handler using this technique. They can be added although we do not
- * use this facility.
- */
-
-static void ipx_remove_socket(struct sock *sk)
-{
- /* Determine interface with which socket is associated */
- struct ipx_interface *intrfc = ipx_sk(sk)->intrfc;
-
- if (!intrfc)
- goto out;
-
- ipxitf_hold(intrfc);
- spin_lock_bh(&intrfc->if_sklist_lock);
- sk_del_node_init(sk);
- spin_unlock_bh(&intrfc->if_sklist_lock);
- ipxitf_put(intrfc);
-out:
- return;
-}
-
-static void ipx_destroy_socket(struct sock *sk)
-{
- ipx_remove_socket(sk);
- skb_queue_purge(&sk->sk_receive_queue);
- sk_refcnt_debug_dec(sk);
-}
-
-/*
- * The following code is used to support IPX Interfaces (IPXITF). An
- * IPX interface is defined by a physical device and a frame type.
- */
-
-/* ipxitf_clear_primary_net has to be called with ipx_interfaces_lock held */
-
-static void ipxitf_clear_primary_net(void)
-{
- ipx_primary_net = NULL;
- if (ipxcfg_auto_select_primary)
- ipx_primary_net = ipx_interfaces_head();
-}
-
-static struct ipx_interface *__ipxitf_find_using_phys(struct net_device *dev,
- __be16 datalink)
-{
- struct ipx_interface *i;
-
- list_for_each_entry(i, &ipx_interfaces, node)
- if (i->if_dev == dev && i->if_dlink_type == datalink)
- goto out;
- i = NULL;
-out:
- return i;
-}
-
-static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev,
- __be16 datalink)
-{
- struct ipx_interface *i;
-
- spin_lock_bh(&ipx_interfaces_lock);
- i = __ipxitf_find_using_phys(dev, datalink);
- if (i)
- ipxitf_hold(i);
- spin_unlock_bh(&ipx_interfaces_lock);
- return i;
-}
-
-struct ipx_interface *ipxitf_find_using_net(__be32 net)
-{
- struct ipx_interface *i;
-
- spin_lock_bh(&ipx_interfaces_lock);
- if (net) {
- list_for_each_entry(i, &ipx_interfaces, node)
- if (i->if_netnum == net)
- goto hold;
- i = NULL;
- goto unlock;
- }
-
- i = ipx_primary_net;
- if (i)
-hold:
- ipxitf_hold(i);
-unlock:
- spin_unlock_bh(&ipx_interfaces_lock);
- return i;
-}
-
-/* Sockets are bound to a particular IPX interface. */
-static void ipxitf_insert_socket(struct ipx_interface *intrfc, struct sock *sk)
-{
- ipxitf_hold(intrfc);
- spin_lock_bh(&intrfc->if_sklist_lock);
- ipx_sk(sk)->intrfc = intrfc;
- sk_add_node(sk, &intrfc->if_sklist);
- spin_unlock_bh(&intrfc->if_sklist_lock);
- ipxitf_put(intrfc);
-}
-
-/* caller must hold intrfc->if_sklist_lock */
-static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc,
- __be16 port)
-{
- struct sock *s;
-
- sk_for_each(s, &intrfc->if_sklist)
- if (ipx_sk(s)->port == port)
- goto found;
- s = NULL;
-found:
- return s;
-}
-
-/* caller must hold a reference to intrfc */
-static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc,
- __be16 port)
-{
- struct sock *s;
-
- spin_lock_bh(&intrfc->if_sklist_lock);
- s = __ipxitf_find_socket(intrfc, port);
- if (s)
- sock_hold(s);
- spin_unlock_bh(&intrfc->if_sklist_lock);
-
- return s;
-}
-
-#ifdef CONFIG_IPX_INTERN
-static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
- unsigned char *ipx_node,
- __be16 port)
-{
- struct sock *s;
-
- ipxitf_hold(intrfc);
- spin_lock_bh(&intrfc->if_sklist_lock);
-
- sk_for_each(s, &intrfc->if_sklist) {
- struct ipx_sock *ipxs = ipx_sk(s);
-
- if (ipxs->port == port &&
- !memcmp(ipx_node, ipxs->node, IPX_NODE_LEN))
- goto found;
- }
- s = NULL;
-found:
- spin_unlock_bh(&intrfc->if_sklist_lock);
- ipxitf_put(intrfc);
- return s;
-}
-#endif
-
-static void __ipxitf_down(struct ipx_interface *intrfc)
-{
- struct sock *s;
- struct hlist_node *t;
-
- /* Delete all routes associated with this interface */
- ipxrtr_del_routes(intrfc);
-
- spin_lock_bh(&intrfc->if_sklist_lock);
- /* error sockets */
- sk_for_each_safe(s, t, &intrfc->if_sklist) {
- struct ipx_sock *ipxs = ipx_sk(s);
-
- s->sk_err = ENOLINK;
- s->sk_error_report(s);
- ipxs->intrfc = NULL;
- ipxs->port = 0;
- sock_set_flag(s, SOCK_ZAPPED); /* Indicates it is no longer bound */
- sk_del_node_init(s);
- }
- INIT_HLIST_HEAD(&intrfc->if_sklist);
- spin_unlock_bh(&intrfc->if_sklist_lock);
-
- /* remove this interface from list */
- list_del(&intrfc->node);
-
- /* remove this interface from *special* networks */
- if (intrfc == ipx_primary_net)
- ipxitf_clear_primary_net();
- if (intrfc == ipx_internal_net)
- ipx_internal_net = NULL;
-
- if (intrfc->if_dev)
- dev_put(intrfc->if_dev);
- kfree(intrfc);
-}
-
-void ipxitf_down(struct ipx_interface *intrfc)
-{
- spin_lock_bh(&ipx_interfaces_lock);
- __ipxitf_down(intrfc);
- spin_unlock_bh(&ipx_interfaces_lock);
-}
-
-static void __ipxitf_put(struct ipx_interface *intrfc)
-{
- if (refcount_dec_and_test(&intrfc->refcnt))
- __ipxitf_down(intrfc);
-}
-
-static int ipxitf_device_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct ipx_interface *i, *tmp;
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- if (event != NETDEV_DOWN && event != NETDEV_UP)
- goto out;
-
- spin_lock_bh(&ipx_interfaces_lock);
- list_for_each_entry_safe(i, tmp, &ipx_interfaces, node)
- if (i->if_dev == dev) {
- if (event == NETDEV_UP)
- ipxitf_hold(i);
- else
- __ipxitf_put(i);
- }
- spin_unlock_bh(&ipx_interfaces_lock);
-out:
- return NOTIFY_DONE;
-}
-
-
-static __exit void ipxitf_cleanup(void)
-{
- struct ipx_interface *i, *tmp;
-
- spin_lock_bh(&ipx_interfaces_lock);
- list_for_each_entry_safe(i, tmp, &ipx_interfaces, node)
- __ipxitf_put(i);
- spin_unlock_bh(&ipx_interfaces_lock);
-}
-
-static void ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb)
-{
- if (sock_queue_rcv_skb(sock, skb) < 0)
- kfree_skb(skb);
-}
-
-/*
- * On input skb->sk is NULL. Nobody is charged for the memory.
- */
-
-/* caller must hold a reference to intrfc */
-
-#ifdef CONFIG_IPX_INTERN
-static int ipxitf_demux_socket(struct ipx_interface *intrfc,
- struct sk_buff *skb, int copy)
-{
- struct ipxhdr *ipx = ipx_hdr(skb);
- int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node,
- IPX_NODE_LEN);
- struct sock *s;
- int rc;
-
- spin_lock_bh(&intrfc->if_sklist_lock);
-
- sk_for_each(s, &intrfc->if_sklist) {
- struct ipx_sock *ipxs = ipx_sk(s);
-
- if (ipxs->port == ipx->ipx_dest.sock &&
- (is_broadcast || !memcmp(ipx->ipx_dest.node,
- ipxs->node, IPX_NODE_LEN))) {
- /* We found a socket to which to send */
- struct sk_buff *skb1;
-
- if (copy) {
- skb1 = skb_clone(skb, GFP_ATOMIC);
- rc = -ENOMEM;
- if (!skb1)
- goto out;
- } else {
- skb1 = skb;
- copy = 1; /* skb may only be used once */
- }
- ipxitf_def_skb_handler(s, skb1);
-
- /* On an external interface, one socket can listen */
- if (intrfc != ipx_internal_net)
- break;
- }
- }
-
- /* skb was solely for us, and we did not make a copy, so free it. */
- if (!copy)
- kfree_skb(skb);
-
- rc = 0;
-out:
- spin_unlock_bh(&intrfc->if_sklist_lock);
- return rc;
-}
-#else
-static struct sock *ncp_connection_hack(struct ipx_interface *intrfc,
- struct ipxhdr *ipx)
-{
- /* The packet's target is a NCP connection handler. We want to hand it
- * to the correct socket directly within the kernel, so that the
- * mars_nwe packet distribution process does not have to do it. Here we
- * only care about NCP and BURST packets.
- *
- * You might call this a hack, but believe me, you do not want a
- * complete NCP layer in the kernel, and this is VERY fast as well. */
- struct sock *sk = NULL;
- int connection = 0;
- u8 *ncphdr = (u8 *)(ipx + 1);
-
- if (*ncphdr == 0x22 && *(ncphdr + 1) == 0x22) /* NCP request */
- connection = (((int) *(ncphdr + 5)) << 8) | (int) *(ncphdr + 3);
- else if (*ncphdr == 0x77 && *(ncphdr + 1) == 0x77) /* BURST packet */
- connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8);
-
- if (connection) {
- /* Now we have to look for a special NCP connection handling
- * socket. Only these sockets have ipx_ncp_conn != 0, set by
- * SIOCIPXNCPCONN. */
- spin_lock_bh(&intrfc->if_sklist_lock);
- sk_for_each(sk, &intrfc->if_sklist)
- if (ipx_sk(sk)->ipx_ncp_conn == connection) {
- sock_hold(sk);
- goto found;
- }
- sk = NULL;
- found:
- spin_unlock_bh(&intrfc->if_sklist_lock);
- }
- return sk;
-}
-
-static int ipxitf_demux_socket(struct ipx_interface *intrfc,
- struct sk_buff *skb, int copy)
-{
- struct ipxhdr *ipx = ipx_hdr(skb);
- struct sock *sock1 = NULL, *sock2 = NULL;
- struct sk_buff *skb1 = NULL, *skb2 = NULL;
- int rc;
-
- if (intrfc == ipx_primary_net && ntohs(ipx->ipx_dest.sock) == 0x451)
- sock1 = ncp_connection_hack(intrfc, ipx);
- if (!sock1)
- /* No special socket found, forward the packet the normal way */
- sock1 = ipxitf_find_socket(intrfc, ipx->ipx_dest.sock);
-
- /*
- * We need to check if there is a primary net and if
- * this is addressed to one of the *SPECIAL* sockets because
- * these need to be propagated to the primary net.
- * The *SPECIAL* socket list contains: 0x452(SAP), 0x453(RIP) and
- * 0x456(Diagnostic).
- */
-
- if (ipx_primary_net && intrfc != ipx_primary_net) {
- const int dsock = ntohs(ipx->ipx_dest.sock);
-
- if (dsock == 0x452 || dsock == 0x453 || dsock == 0x456)
- /* The appropriate thing to do here is to dup the
- * packet and route to the primary net interface via
- * ipxitf_send; however, we'll cheat and just demux it
- * here. */
- sock2 = ipxitf_find_socket(ipx_primary_net,
- ipx->ipx_dest.sock);
- }
-
- /*
- * If there is nothing to do return. The kfree will cancel any charging.
- */
- rc = 0;
- if (!sock1 && !sock2) {
- if (!copy)
- kfree_skb(skb);
- goto out;
- }
-
- /*
- * This next segment of code is a little awkward, but it sets it up
- * so that the appropriate number of copies of the SKB are made and
- * that skb1 and skb2 point to it (them) so that it (they) can be
- * demuxed to sock1 and/or sock2. If we are unable to make enough
- * copies, we do as much as is possible.
- */
-
- if (copy)
- skb1 = skb_clone(skb, GFP_ATOMIC);
- else
- skb1 = skb;
-
- rc = -ENOMEM;
- if (!skb1)
- goto out_put;
-
- /* Do we need 2 SKBs? */
- if (sock1 && sock2)
- skb2 = skb_clone(skb1, GFP_ATOMIC);
- else
- skb2 = skb1;
-
- if (sock1)
- ipxitf_def_skb_handler(sock1, skb1);
-
- if (!skb2)
- goto out_put;
-
- if (sock2)
- ipxitf_def_skb_handler(sock2, skb2);
-
- rc = 0;
-out_put:
- if (sock1)
- sock_put(sock1);
- if (sock2)
- sock_put(sock2);
-out:
- return rc;
-}
-#endif /* CONFIG_IPX_INTERN */
-
-static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
- struct sk_buff *skb)
-{
- struct sk_buff *skb2;
- int in_offset = (unsigned char *)ipx_hdr(skb) - skb->head;
- int out_offset = intrfc->if_ipx_offset;
- int len;
-
- /* Hopefully, most cases */
- if (in_offset >= out_offset)
- return skb;
-
- /* Need new SKB */
- len = skb->len + out_offset;
- skb2 = alloc_skb(len, GFP_ATOMIC);
- if (skb2) {
- skb_reserve(skb2, out_offset);
- skb_reset_network_header(skb2);
- skb_reset_transport_header(skb2);
- skb_put(skb2, skb->len);
- memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
- memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
- }
- kfree_skb(skb);
- return skb2;
-}
-
-/* caller must hold a reference to intrfc and the skb has to be unshared */
-int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node)
-{
- struct ipxhdr *ipx = ipx_hdr(skb);
- struct net_device *dev = intrfc->if_dev;
- struct datalink_proto *dl = intrfc->if_dlink;
- char dest_node[IPX_NODE_LEN];
- int send_to_wire = 1;
- int addr_len;
-
- ipx->ipx_tctrl = IPX_SKB_CB(skb)->ipx_tctrl;
- ipx->ipx_dest.net = IPX_SKB_CB(skb)->ipx_dest_net;
- ipx->ipx_source.net = IPX_SKB_CB(skb)->ipx_source_net;
-
- /* see if we need to include the netnum in the route list */
- if (IPX_SKB_CB(skb)->last_hop.index >= 0) {
- __be32 *last_hop = (__be32 *)(((u8 *) skb->data) +
- sizeof(struct ipxhdr) +
- IPX_SKB_CB(skb)->last_hop.index *
- sizeof(__be32));
- *last_hop = IPX_SKB_CB(skb)->last_hop.netnum;
- IPX_SKB_CB(skb)->last_hop.index = -1;
- }
-
- /*
- * We need to know how many skbuffs it will take to send out this
- * packet to avoid unnecessary copies.
- */
-
- if (!dl || !dev || dev->flags & IFF_LOOPBACK)
- send_to_wire = 0; /* No non looped */
-
- /*
- * See if this should be demuxed to sockets on this interface
- *
- * We want to ensure the original was eaten or that we only use
- * up clones.
- */
-
- if (ipx->ipx_dest.net == intrfc->if_netnum) {
- /*
- * To our own node, loop and free the original.
- * The internal net will receive on all node address.
- */
- if (intrfc == ipx_internal_net ||
- !memcmp(intrfc->if_node, node, IPX_NODE_LEN)) {
- /* Don't charge sender */
- skb_orphan(skb);
-
- /* Will charge receiver */
- return ipxitf_demux_socket(intrfc, skb, 0);
- }
-
- /* Broadcast, loop and possibly keep to send on. */
- if (!memcmp(ipx_broadcast_node, node, IPX_NODE_LEN)) {
- if (!send_to_wire)
- skb_orphan(skb);
- ipxitf_demux_socket(intrfc, skb, send_to_wire);
- if (!send_to_wire)
- goto out;
- }
- }
-
- /*
- * If the originating net is not equal to our net; this is routed
- * We are still charging the sender. Which is right - the driver
- * free will handle this fairly.
- */
- if (ipx->ipx_source.net != intrfc->if_netnum) {
- /*
- * Unshare the buffer before modifying the count in
- * case it's a flood or tcpdump
- */
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- goto out;
- if (++ipx->ipx_tctrl > ipxcfg_max_hops)
- send_to_wire = 0;
- }
-
- if (!send_to_wire) {
- kfree_skb(skb);
- goto out;
- }
-
- /* Determine the appropriate hardware address */
- addr_len = dev->addr_len;
- if (!memcmp(ipx_broadcast_node, node, IPX_NODE_LEN))
- memcpy(dest_node, dev->broadcast, addr_len);
- else
- memcpy(dest_node, &(node[IPX_NODE_LEN-addr_len]), addr_len);
-
- /* Make any compensation for differing physical/data link size */
- skb = ipxitf_adjust_skbuff(intrfc, skb);
- if (!skb)
- goto out;
-
- /* set up data link and physical headers */
- skb->dev = dev;
- skb->protocol = htons(ETH_P_IPX);
-
- /* Send it out */
- dl->request(dl, skb, dest_node);
-out:
- return 0;
-}
-
-static int ipxitf_add_local_route(struct ipx_interface *intrfc)
-{
- return ipxrtr_add_route(intrfc->if_netnum, intrfc, NULL);
-}
-
-static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
- struct sk_buff *skb);
-static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb);
-
-static int ipxitf_rcv(struct ipx_interface *intrfc, struct sk_buff *skb)
-{
- struct ipxhdr *ipx = ipx_hdr(skb);
- int rc = 0;
-
- ipxitf_hold(intrfc);
-
- /* See if we should update our network number */
- if (!intrfc->if_netnum) /* net number of intrfc not known yet */
- ipxitf_discover_netnum(intrfc, skb);
-
- IPX_SKB_CB(skb)->last_hop.index = -1;
- if (ipx->ipx_type == IPX_TYPE_PPROP) {
- rc = ipxitf_pprop(intrfc, skb);
- if (rc)
- goto out_free_skb;
- }
-
- /* local processing follows */
- if (!IPX_SKB_CB(skb)->ipx_dest_net)
- IPX_SKB_CB(skb)->ipx_dest_net = intrfc->if_netnum;
- if (!IPX_SKB_CB(skb)->ipx_source_net)
- IPX_SKB_CB(skb)->ipx_source_net = intrfc->if_netnum;
-
- /* it doesn't make sense to route a pprop packet, there's no meaning
- * in the ipx_dest_net for such packets */
- if (ipx->ipx_type != IPX_TYPE_PPROP &&
- intrfc->if_netnum != IPX_SKB_CB(skb)->ipx_dest_net) {
- /* We only route point-to-point packets. */
- if (skb->pkt_type == PACKET_HOST) {
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb)
- rc = ipxrtr_route_skb(skb);
- goto out_intrfc;
- }
-
- goto out_free_skb;
- }
-
- /* see if we should keep it */
- if (!memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) ||
- !memcmp(intrfc->if_node, ipx->ipx_dest.node, IPX_NODE_LEN)) {
- rc = ipxitf_demux_socket(intrfc, skb, 0);
- goto out_intrfc;
- }
-
- /* we couldn't pawn it off so unload it */
-out_free_skb:
- kfree_skb(skb);
-out_intrfc:
- ipxitf_put(intrfc);
- return rc;
-}
-
-static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
- struct sk_buff *skb)
-{
- const struct ipx_cb *cb = IPX_SKB_CB(skb);
-
- /* see if this is an intra packet: source_net == dest_net */
- if (cb->ipx_source_net == cb->ipx_dest_net && cb->ipx_source_net) {
- struct ipx_interface *i =
- ipxitf_find_using_net(cb->ipx_source_net);
- /* NB: NetWare servers lie about their hop count so we
- * dropped the test based on it. This is the best way
- * to determine this is a 0 hop count packet. */
- if (!i) {
- intrfc->if_netnum = cb->ipx_source_net;
- ipxitf_add_local_route(intrfc);
- } else {
- printk(KERN_WARNING "IPX: Network number collision "
- "%lx\n %s %s and %s %s\n",
- (unsigned long) ntohl(cb->ipx_source_net),
- ipx_device_name(i),
- ipx_frame_name(i->if_dlink_type),
- ipx_device_name(intrfc),
- ipx_frame_name(intrfc->if_dlink_type));
- ipxitf_put(i);
- }
- }
-}
-
-/**
- * ipxitf_pprop - Process packet propagation IPX packet type 0x14, used for
- * NetBIOS broadcasts
- * @intrfc: IPX interface receiving this packet
- * @skb: Received packet
- *
- * Checks if packet is valid: if its more than %IPX_MAX_PPROP_HOPS hops or if it
- * is smaller than a IPX header + the room for %IPX_MAX_PPROP_HOPS hops we drop
- * it, not even processing it locally, if it has exact %IPX_MAX_PPROP_HOPS we
- * don't broadcast it, but process it locally. See chapter 5 of Novell's "IPX
- * RIP and SAP Router Specification", Part Number 107-000029-001.
- *
- * If it is valid, check if we have pprop broadcasting enabled by the user,
- * if not, just return zero for local processing.
- *
- * If it is enabled check the packet and don't broadcast it if we have already
- * seen this packet.
- *
- * Broadcast: send it to the interfaces that aren't on the packet visited nets
- * array, just after the IPX header.
- *
- * Returns -EINVAL for invalid packets, so that the calling function drops
- * the packet without local processing. 0 if packet is to be locally processed.
- */
-static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
-{
- struct ipxhdr *ipx = ipx_hdr(skb);
- int i, rc = -EINVAL;
- struct ipx_interface *ifcs;
- char *c;
- __be32 *l;
-
- /* Illegal packet - too many hops or too short */
- /* We decide to throw it away: no broadcasting, no local processing.
- * NetBIOS unaware implementations route them as normal packets -
- * tctrl <= 15, any data payload... */
- if (IPX_SKB_CB(skb)->ipx_tctrl > IPX_MAX_PPROP_HOPS ||
- ntohs(ipx->ipx_pktsize) < sizeof(struct ipxhdr) +
- IPX_MAX_PPROP_HOPS * sizeof(u32))
- goto out;
- /* are we broadcasting this damn thing? */
- rc = 0;
- if (!sysctl_ipx_pprop_broadcasting)
- goto out;
- /* We do broadcast packet on the IPX_MAX_PPROP_HOPS hop, but we
- * process it locally. All previous hops broadcasted it, and process it
- * locally. */
- if (IPX_SKB_CB(skb)->ipx_tctrl == IPX_MAX_PPROP_HOPS)
- goto out;
-
- c = ((u8 *) ipx) + sizeof(struct ipxhdr);
- l = (__be32 *) c;
-
- /* Don't broadcast packet if already seen this net */
- for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
- if (*l++ == intrfc->if_netnum)
- goto out;
-
- /* < IPX_MAX_PPROP_HOPS hops && input interface not in list. Save the
- * position where we will insert recvd netnum into list, later on,
- * in ipxitf_send */
- IPX_SKB_CB(skb)->last_hop.index = i;
- IPX_SKB_CB(skb)->last_hop.netnum = intrfc->if_netnum;
- /* xmit on all other interfaces... */
- spin_lock_bh(&ipx_interfaces_lock);
- list_for_each_entry(ifcs, &ipx_interfaces, node) {
- /* Except unconfigured interfaces */
- if (!ifcs->if_netnum)
- continue;
-
- /* That aren't in the list */
- if (ifcs == intrfc)
- continue;
- l = (__be32 *) c;
- /* don't consider the last entry in the packet list,
- * it is our netnum, and it is not there yet */
- for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
- if (ifcs->if_netnum == *l++)
- break;
- if (i == IPX_SKB_CB(skb)->ipx_tctrl) {
- struct sk_buff *s = skb_copy(skb, GFP_ATOMIC);
-
- if (s) {
- IPX_SKB_CB(s)->ipx_dest_net = ifcs->if_netnum;
- ipxrtr_route_skb(s);
- }
- }
- }
- spin_unlock_bh(&ipx_interfaces_lock);
-out:
- return rc;
-}
-
-static void ipxitf_insert(struct ipx_interface *intrfc)
-{
- spin_lock_bh(&ipx_interfaces_lock);
- list_add_tail(&intrfc->node, &ipx_interfaces);
- spin_unlock_bh(&ipx_interfaces_lock);
-
- if (ipxcfg_auto_select_primary && !ipx_primary_net)
- ipx_primary_net = intrfc;
-}
-
-static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __be32 netnum,
- __be16 dlink_type,
- struct datalink_proto *dlink,
- unsigned char internal,
- int ipx_offset)
-{
- struct ipx_interface *intrfc = kmalloc(sizeof(*intrfc), GFP_ATOMIC);
-
- if (intrfc) {
- intrfc->if_dev = dev;
- intrfc->if_netnum = netnum;
- intrfc->if_dlink_type = dlink_type;
- intrfc->if_dlink = dlink;
- intrfc->if_internal = internal;
- intrfc->if_ipx_offset = ipx_offset;
- intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET;
- INIT_HLIST_HEAD(&intrfc->if_sklist);
- refcount_set(&intrfc->refcnt, 1);
- spin_lock_init(&intrfc->if_sklist_lock);
- }
-
- return intrfc;
-}
-
-static int ipxitf_create_internal(struct ipx_interface_definition *idef)
-{
- struct ipx_interface *intrfc;
- int rc = -EEXIST;
-
- /* Only one primary network allowed */
- if (ipx_primary_net)
- goto out;
-
- /* Must have a valid network number */
- rc = -EADDRNOTAVAIL;
- if (!idef->ipx_network)
- goto out;
- intrfc = ipxitf_find_using_net(idef->ipx_network);
- rc = -EADDRINUSE;
- if (intrfc) {
- ipxitf_put(intrfc);
- goto out;
- }
- intrfc = ipxitf_alloc(NULL, idef->ipx_network, 0, NULL, 1, 0);
- rc = -EAGAIN;
- if (!intrfc)
- goto out;
- memcpy((char *)&(intrfc->if_node), idef->ipx_node, IPX_NODE_LEN);
- ipx_internal_net = ipx_primary_net = intrfc;
- ipxitf_hold(intrfc);
- ipxitf_insert(intrfc);
-
- rc = ipxitf_add_local_route(intrfc);
- ipxitf_put(intrfc);
-out:
- return rc;
-}
-
-static __be16 ipx_map_frame_type(unsigned char type)
-{
- __be16 rc = 0;
-
- switch (type) {
- case IPX_FRAME_ETHERII: rc = htons(ETH_P_IPX); break;
- case IPX_FRAME_8022: rc = htons(ETH_P_802_2); break;
- case IPX_FRAME_SNAP: rc = htons(ETH_P_SNAP); break;
- case IPX_FRAME_8023: rc = htons(ETH_P_802_3); break;
- }
-
- return rc;
-}
-
-static int ipxitf_create(struct ipx_interface_definition *idef)
-{
- struct net_device *dev;
- __be16 dlink_type = 0;
- struct datalink_proto *datalink = NULL;
- struct ipx_interface *intrfc;
- int rc;
-
- if (idef->ipx_special == IPX_INTERNAL) {
- rc = ipxitf_create_internal(idef);
- goto out;
- }
-
- rc = -EEXIST;
- if (idef->ipx_special == IPX_PRIMARY && ipx_primary_net)
- goto out;
-
- intrfc = ipxitf_find_using_net(idef->ipx_network);
- rc = -EADDRINUSE;
- if (idef->ipx_network && intrfc) {
- ipxitf_put(intrfc);
- goto out;
- }
-
- if (intrfc)
- ipxitf_put(intrfc);
-
- dev = dev_get_by_name(&init_net, idef->ipx_device);
- rc = -ENODEV;
- if (!dev)
- goto out;
-
- switch (idef->ipx_dlink_type) {
- case IPX_FRAME_8022:
- dlink_type = htons(ETH_P_802_2);
- datalink = p8022_datalink;
- break;
- case IPX_FRAME_ETHERII:
- if (dev->type != ARPHRD_IEEE802) {
- dlink_type = htons(ETH_P_IPX);
- datalink = pEII_datalink;
- break;
- }
- /* fall through */
- case IPX_FRAME_SNAP:
- dlink_type = htons(ETH_P_SNAP);
- datalink = pSNAP_datalink;
- break;
- case IPX_FRAME_8023:
- dlink_type = htons(ETH_P_802_3);
- datalink = p8023_datalink;
- break;
- case IPX_FRAME_NONE:
- default:
- rc = -EPROTONOSUPPORT;
- goto out_dev;
- }
-
- rc = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
- goto out_dev;
-
- /* Check addresses are suitable */
- rc = -EINVAL;
- if (dev->addr_len > IPX_NODE_LEN)
- goto out_dev;
-
- intrfc = ipxitf_find_using_phys(dev, dlink_type);
- if (!intrfc) {
- /* Ok now create */
- intrfc = ipxitf_alloc(dev, idef->ipx_network, dlink_type,
- datalink, 0, dev->hard_header_len +
- datalink->header_length);
- rc = -EAGAIN;
- if (!intrfc)
- goto out_dev;
- /* Setup primary if necessary */
- if (idef->ipx_special == IPX_PRIMARY)
- ipx_primary_net = intrfc;
- if (!memcmp(idef->ipx_node, "\000\000\000\000\000\000",
- IPX_NODE_LEN)) {
- memset(intrfc->if_node, 0, IPX_NODE_LEN);
- memcpy(intrfc->if_node + IPX_NODE_LEN - dev->addr_len,
- dev->dev_addr, dev->addr_len);
- } else
- memcpy(intrfc->if_node, idef->ipx_node, IPX_NODE_LEN);
- ipxitf_hold(intrfc);
- ipxitf_insert(intrfc);
- }
-
-
- /* If the network number is known, add a route */
- rc = 0;
- if (!intrfc->if_netnum)
- goto out_intrfc;
-
- rc = ipxitf_add_local_route(intrfc);
-out_intrfc:
- ipxitf_put(intrfc);
- goto out;
-out_dev:
- dev_put(dev);
-out:
- return rc;
-}
-
-static int ipxitf_delete(struct ipx_interface_definition *idef)
-{
- struct net_device *dev = NULL;
- __be16 dlink_type = 0;
- struct ipx_interface *intrfc;
- int rc = 0;
-
- spin_lock_bh(&ipx_interfaces_lock);
- if (idef->ipx_special == IPX_INTERNAL) {
- if (ipx_internal_net) {
- __ipxitf_put(ipx_internal_net);
- goto out;
- }
- rc = -ENOENT;
- goto out;
- }
-
- dlink_type = ipx_map_frame_type(idef->ipx_dlink_type);
- rc = -EPROTONOSUPPORT;
- if (!dlink_type)
- goto out;
-
- dev = __dev_get_by_name(&init_net, idef->ipx_device);
- rc = -ENODEV;
- if (!dev)
- goto out;
-
- intrfc = __ipxitf_find_using_phys(dev, dlink_type);
- rc = -EINVAL;
- if (!intrfc)
- goto out;
- __ipxitf_put(intrfc);
-
- rc = 0;
-out:
- spin_unlock_bh(&ipx_interfaces_lock);
- return rc;
-}
-
-static struct ipx_interface *ipxitf_auto_create(struct net_device *dev,
- __be16 dlink_type)
-{
- struct ipx_interface *intrfc = NULL;
- struct datalink_proto *datalink;
-
- if (!dev)
- goto out;
-
- /* Check addresses are suitable */
- if (dev->addr_len > IPX_NODE_LEN)
- goto out;
-
- switch (ntohs(dlink_type)) {
- case ETH_P_IPX: datalink = pEII_datalink; break;
- case ETH_P_802_2: datalink = p8022_datalink; break;
- case ETH_P_SNAP: datalink = pSNAP_datalink; break;
- case ETH_P_802_3: datalink = p8023_datalink; break;
- default: goto out;
- }
-
- intrfc = ipxitf_alloc(dev, 0, dlink_type, datalink, 0,
- dev->hard_header_len + datalink->header_length);
-
- if (intrfc) {
- memset(intrfc->if_node, 0, IPX_NODE_LEN);
- memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]),
- dev->dev_addr, dev->addr_len);
- spin_lock_init(&intrfc->if_sklist_lock);
- refcount_set(&intrfc->refcnt, 1);
- ipxitf_insert(intrfc);
- dev_hold(dev);
- }
-
-out:
- return intrfc;
-}
-
-static int ipxitf_ioctl(unsigned int cmd, void __user *arg)
-{
- int rc = -EINVAL;
- struct ifreq ifr;
- int val;
-
- switch (cmd) {
- case SIOCSIFADDR: {
- struct sockaddr_ipx *sipx;
- struct ipx_interface_definition f;
-
- rc = -EFAULT;
- if (copy_from_user(&ifr, arg, sizeof(ifr)))
- break;
- sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
- rc = -EINVAL;
- if (sipx->sipx_family != AF_IPX)
- break;
- f.ipx_network = sipx->sipx_network;
- memcpy(f.ipx_device, ifr.ifr_name,
- sizeof(f.ipx_device));
- memcpy(f.ipx_node, sipx->sipx_node, IPX_NODE_LEN);
- f.ipx_dlink_type = sipx->sipx_type;
- f.ipx_special = sipx->sipx_special;
-
- if (sipx->sipx_action == IPX_DLTITF)
- rc = ipxitf_delete(&f);
- else
- rc = ipxitf_create(&f);
- break;
- }
- case SIOCGIFADDR: {
- struct sockaddr_ipx *sipx;
- struct ipx_interface *ipxif;
- struct net_device *dev;
-
- rc = -EFAULT;
- if (copy_from_user(&ifr, arg, sizeof(ifr)))
- break;
- sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
- dev = __dev_get_by_name(&init_net, ifr.ifr_name);
- rc = -ENODEV;
- if (!dev)
- break;
- ipxif = ipxitf_find_using_phys(dev,
- ipx_map_frame_type(sipx->sipx_type));
- rc = -EADDRNOTAVAIL;
- if (!ipxif)
- break;
-
- sipx->sipx_family = AF_IPX;
- sipx->sipx_network = ipxif->if_netnum;
- memcpy(sipx->sipx_node, ipxif->if_node,
- sizeof(sipx->sipx_node));
- rc = 0;
- if (copy_to_user(arg, &ifr, sizeof(ifr)))
- rc = -EFAULT;
- ipxitf_put(ipxif);
- break;
- }
- case SIOCAIPXITFCRT:
- rc = -EFAULT;
- if (get_user(val, (unsigned char __user *) arg))
- break;
- rc = 0;
- ipxcfg_auto_create_interfaces = val;
- break;
- case SIOCAIPXPRISLT:
- rc = -EFAULT;
- if (get_user(val, (unsigned char __user *) arg))
- break;
- rc = 0;
- ipxcfg_set_auto_select(val);
- break;
- }
-
- return rc;
-}
-
-/*
- * Checksum routine for IPX
- */
-
-/* Note: We assume ipx_tctrl==0 and htons(length)==ipx_pktsize */
-/* This functions should *not* mess with packet contents */
-
-__be16 ipx_cksum(struct ipxhdr *packet, int length)
-{
- /*
- * NOTE: sum is a net byte order quantity, which optimizes the
- * loop. This only works on big and little endian machines. (I
- * don't know of a machine that isn't.)
- */
- /* handle the first 3 words separately; checksum should be skipped
- * and ipx_tctrl masked out */
- __u16 *p = (__u16 *)packet;
- __u32 sum = p[1] + (p[2] & (__force u16)htons(0x00ff));
- __u32 i = (length >> 1) - 3; /* Number of remaining complete words */
-
- /* Loop through them */
- p += 3;
- while (i--)
- sum += *p++;
-
- /* Add on the last part word if it exists */
- if (packet->ipx_pktsize & htons(1))
- sum += (__force u16)htons(0xff00) & *p;
-
- /* Do final fixup */
- sum = (sum & 0xffff) + (sum >> 16);
-
- /* It's a pity there's no concept of carry in C */
- if (sum >= 0x10000)
- sum++;
-
- /*
- * Leave 0 alone; we don't want 0xffff here. Note that we can't get
- * here with 0x10000, so this check is the same as ((__u16)sum)
- */
- if (sum)
- sum = ~sum;
-
- return (__force __be16)sum;
-}
-
-const char *ipx_frame_name(__be16 frame)
-{
- char* rc = "None";
-
- switch (ntohs(frame)) {
- case ETH_P_IPX: rc = "EtherII"; break;
- case ETH_P_802_2: rc = "802.2"; break;
- case ETH_P_SNAP: rc = "SNAP"; break;
- case ETH_P_802_3: rc = "802.3"; break;
- }
-
- return rc;
-}
-
-const char *ipx_device_name(struct ipx_interface *intrfc)
-{
- return intrfc->if_internal ? "Internal" :
- intrfc->if_dev ? intrfc->if_dev->name : "Unknown";
-}
-
-/* Handling for system calls applied via the various interfaces to an IPX
- * socket object. */
-
-static int ipx_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct sock *sk = sock->sk;
- int opt;
- int rc = -EINVAL;
-
- lock_sock(sk);
- if (optlen != sizeof(int))
- goto out;
-
- rc = -EFAULT;
- if (get_user(opt, (unsigned int __user *)optval))
- goto out;
-
- rc = -ENOPROTOOPT;
- if (!(level == SOL_IPX && optname == IPX_TYPE))
- goto out;
-
- ipx_sk(sk)->type = opt;
- rc = 0;
-out:
- release_sock(sk);
- return rc;
-}
-
-static int ipx_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- struct sock *sk = sock->sk;
- int val = 0;
- int len;
- int rc = -ENOPROTOOPT;
-
- lock_sock(sk);
- if (!(level == SOL_IPX && optname == IPX_TYPE))
- goto out;
-
- val = ipx_sk(sk)->type;
-
- rc = -EFAULT;
- if (get_user(len, optlen))
- goto out;
-
- len = min_t(unsigned int, len, sizeof(int));
- rc = -EINVAL;
- if(len < 0)
- goto out;
-
- rc = -EFAULT;
- if (put_user(len, optlen) || copy_to_user(optval, &val, len))
- goto out;
-
- rc = 0;
-out:
- release_sock(sk);
- return rc;
-}
-
-static struct proto ipx_proto = {
- .name = "IPX",
- .owner = THIS_MODULE,
- .obj_size = sizeof(struct ipx_sock),
-};
-
-static int ipx_create(struct net *net, struct socket *sock, int protocol,
- int kern)
-{
- int rc = -ESOCKTNOSUPPORT;
- struct sock *sk;
-
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
- /*
- * SPX support is not anymore in the kernel sources. If you want to
- * ressurrect it, completing it and making it understand shared skbs,
- * be fully multithreaded, etc, grab the sources in an early 2.5 kernel
- * tree.
- */
- if (sock->type != SOCK_DGRAM)
- goto out;
-
- rc = -ENOMEM;
- sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern);
- if (!sk)
- goto out;
-
- sk_refcnt_debug_inc(sk);
- sock_init_data(sock, sk);
- sk->sk_no_check_tx = 1; /* Checksum off by default */
- sock->ops = &ipx_dgram_ops;
- rc = 0;
-out:
- return rc;
-}
-
-static int ipx_release(struct socket *sock)
-{
- struct sock *sk = sock->sk;
-
- if (!sk)
- goto out;
-
- lock_sock(sk);
- sk->sk_shutdown = SHUTDOWN_MASK;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
-
- sock_set_flag(sk, SOCK_DEAD);
- sock->sk = NULL;
- sk_refcnt_debug_release(sk);
- ipx_destroy_socket(sk);
- release_sock(sk);
- sock_put(sk);
-out:
- return 0;
-}
-
-/* caller must hold a reference to intrfc */
-
-static __be16 ipx_first_free_socketnum(struct ipx_interface *intrfc)
-{
- unsigned short socketNum = intrfc->if_sknum;
-
- spin_lock_bh(&intrfc->if_sklist_lock);
-
- if (socketNum < IPX_MIN_EPHEMERAL_SOCKET)
- socketNum = IPX_MIN_EPHEMERAL_SOCKET;
-
- while (__ipxitf_find_socket(intrfc, htons(socketNum)))
- if (socketNum > IPX_MAX_EPHEMERAL_SOCKET)
- socketNum = IPX_MIN_EPHEMERAL_SOCKET;
- else
- socketNum++;
-
- spin_unlock_bh(&intrfc->if_sklist_lock);
- intrfc->if_sknum = socketNum;
-
- return htons(socketNum);
-}
-
-static int __ipx_bind(struct socket *sock,
- struct sockaddr *uaddr, int addr_len)
-{
- struct sock *sk = sock->sk;
- struct ipx_sock *ipxs = ipx_sk(sk);
- struct ipx_interface *intrfc;
- struct sockaddr_ipx *addr = (struct sockaddr_ipx *)uaddr;
- int rc = -EINVAL;
-
- if (!sock_flag(sk, SOCK_ZAPPED) || addr_len != sizeof(struct sockaddr_ipx))
- goto out;
-
- intrfc = ipxitf_find_using_net(addr->sipx_network);
- rc = -EADDRNOTAVAIL;
- if (!intrfc)
- goto out;
-
- if (!addr->sipx_port) {
- addr->sipx_port = ipx_first_free_socketnum(intrfc);
- rc = -EINVAL;
- if (!addr->sipx_port)
- goto out_put;
- }
-
- /* protect IPX system stuff like routing/sap */
- rc = -EACCES;
- if (ntohs(addr->sipx_port) < IPX_MIN_EPHEMERAL_SOCKET &&
- !capable(CAP_NET_ADMIN))
- goto out_put;
-
- ipxs->port = addr->sipx_port;
-
-#ifdef CONFIG_IPX_INTERN
- if (intrfc == ipx_internal_net) {
- /* The source address is to be set explicitly if the
- * socket is to be bound on the internal network. If a
- * node number 0 was specified, the default is used.
- */
-
- rc = -EINVAL;
- if (!memcmp(addr->sipx_node, ipx_broadcast_node, IPX_NODE_LEN))
- goto out_put;
- if (!memcmp(addr->sipx_node, ipx_this_node, IPX_NODE_LEN))
- memcpy(ipxs->node, intrfc->if_node, IPX_NODE_LEN);
- else
- memcpy(ipxs->node, addr->sipx_node, IPX_NODE_LEN);
-
- rc = -EADDRINUSE;
- if (ipxitf_find_internal_socket(intrfc, ipxs->node,
- ipxs->port)) {
- SOCK_DEBUG(sk,
- "IPX: bind failed because port %X in use.\n",
- ntohs(addr->sipx_port));
- goto out_put;
- }
- } else {
- /* Source addresses are easy. It must be our
- * network:node pair for an interface routed to IPX
- * with the ipx routing ioctl()
- */
-
- memcpy(ipxs->node, intrfc->if_node, IPX_NODE_LEN);
-
- rc = -EADDRINUSE;
- if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
- SOCK_DEBUG(sk,
- "IPX: bind failed because port %X in use.\n",
- ntohs(addr->sipx_port));
- goto out_put;
- }
- }
-
-#else /* !def CONFIG_IPX_INTERN */
-
- /* Source addresses are easy. It must be our network:node pair for
- an interface routed to IPX with the ipx routing ioctl() */
-
- rc = -EADDRINUSE;
- if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
- SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n",
- ntohs((int)addr->sipx_port));
- goto out_put;
- }
-
-#endif /* CONFIG_IPX_INTERN */
-
- ipxitf_insert_socket(intrfc, sk);
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- rc = 0;
-out_put:
- ipxitf_put(intrfc);
-out:
- return rc;
-}
-
-static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
- struct sock *sk = sock->sk;
- int rc;
-
- lock_sock(sk);
- rc = __ipx_bind(sock, uaddr, addr_len);
- release_sock(sk);
-
- return rc;
-}
-
-static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
- int addr_len, int flags)
-{
- struct sock *sk = sock->sk;
- struct ipx_sock *ipxs = ipx_sk(sk);
- struct sockaddr_ipx *addr;
- int rc = -EINVAL;
- struct ipx_route *rt;
-
- sk->sk_state = TCP_CLOSE;
- sock->state = SS_UNCONNECTED;
-
- lock_sock(sk);
- if (addr_len != sizeof(*addr))
- goto out;
- addr = (struct sockaddr_ipx *)uaddr;
-
- /* put the autobinding in */
- if (!ipxs->port) {
- struct sockaddr_ipx uaddr;
-
- uaddr.sipx_port = 0;
- uaddr.sipx_network = 0;
-
-#ifdef CONFIG_IPX_INTERN
- rc = -ENETDOWN;
- if (!ipxs->intrfc)
- goto out; /* Someone zonked the iface */
- memcpy(uaddr.sipx_node, ipxs->intrfc->if_node,
- IPX_NODE_LEN);
-#endif /* CONFIG_IPX_INTERN */
-
- rc = __ipx_bind(sock, (struct sockaddr *)&uaddr,
- sizeof(struct sockaddr_ipx));
- if (rc)
- goto out;
- }
-
- /* We can either connect to primary network or somewhere
- * we can route to */
- rt = ipxrtr_lookup(addr->sipx_network);
- rc = -ENETUNREACH;
- if (!rt && !(!addr->sipx_network && ipx_primary_net))
- goto out;
-
- ipxs->dest_addr.net = addr->sipx_network;
- ipxs->dest_addr.sock = addr->sipx_port;
- memcpy(ipxs->dest_addr.node, addr->sipx_node, IPX_NODE_LEN);
- ipxs->type = addr->sipx_type;
-
- if (sock->type == SOCK_DGRAM) {
- sock->state = SS_CONNECTED;
- sk->sk_state = TCP_ESTABLISHED;
- }
-
- if (rt)
- ipxrtr_put(rt);
- rc = 0;
-out:
- release_sock(sk);
- return rc;
-}
-
-
-static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
-{
- struct ipx_address *addr;
- struct sockaddr_ipx sipx;
- struct sock *sk = sock->sk;
- struct ipx_sock *ipxs = ipx_sk(sk);
- int rc;
-
- *uaddr_len = sizeof(struct sockaddr_ipx);
-
- lock_sock(sk);
- if (peer) {
- rc = -ENOTCONN;
- if (sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- addr = &ipxs->dest_addr;
- sipx.sipx_network = addr->net;
- sipx.sipx_port = addr->sock;
- memcpy(sipx.sipx_node, addr->node, IPX_NODE_LEN);
- } else {
- if (ipxs->intrfc) {
- sipx.sipx_network = ipxs->intrfc->if_netnum;
-#ifdef CONFIG_IPX_INTERN
- memcpy(sipx.sipx_node, ipxs->node, IPX_NODE_LEN);
-#else
- memcpy(sipx.sipx_node, ipxs->intrfc->if_node,
- IPX_NODE_LEN);
-#endif /* CONFIG_IPX_INTERN */
-
- } else {
- sipx.sipx_network = 0;
- memset(sipx.sipx_node, '\0', IPX_NODE_LEN);
- }
-
- sipx.sipx_port = ipxs->port;
- }
-
- sipx.sipx_family = AF_IPX;
- sipx.sipx_type = ipxs->type;
- sipx.sipx_zero = 0;
- memcpy(uaddr, &sipx, sizeof(sipx));
-
- rc = 0;
-out:
- release_sock(sk);
- return rc;
-}
-
-static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
-{
- /* NULL here for pt means the packet was looped back */
- struct ipx_interface *intrfc;
- struct ipxhdr *ipx;
- u16 ipx_pktsize;
- int rc = 0;
-
- if (!net_eq(dev_net(dev), &init_net))
- goto drop;
-
- /* Not ours */
- if (skb->pkt_type == PACKET_OTHERHOST)
- goto drop;
-
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
- goto out;
-
- if (!pskb_may_pull(skb, sizeof(struct ipxhdr)))
- goto drop;
-
- ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize);
-
- /* Too small or invalid header? */
- if (ipx_pktsize < sizeof(struct ipxhdr) ||
- !pskb_may_pull(skb, ipx_pktsize))
- goto drop;
-
- ipx = ipx_hdr(skb);
- if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
- ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize))
- goto drop;
-
- IPX_SKB_CB(skb)->ipx_tctrl = ipx->ipx_tctrl;
- IPX_SKB_CB(skb)->ipx_dest_net = ipx->ipx_dest.net;
- IPX_SKB_CB(skb)->ipx_source_net = ipx->ipx_source.net;
-
- /* Determine what local ipx endpoint this is */
- intrfc = ipxitf_find_using_phys(dev, pt->type);
- if (!intrfc) {
- if (ipxcfg_auto_create_interfaces &&
- IPX_SKB_CB(skb)->ipx_dest_net) {
- intrfc = ipxitf_auto_create(dev, pt->type);
- if (intrfc)
- ipxitf_hold(intrfc);
- }
-
- if (!intrfc) /* Not one of ours */
- /* or invalid packet for auto creation */
- goto drop;
- }
-
- rc = ipxitf_rcv(intrfc, skb);
- ipxitf_put(intrfc);
- goto out;
-drop:
- kfree_skb(skb);
-out:
- return rc;
-}
-
-static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-{
- struct sock *sk = sock->sk;
- struct ipx_sock *ipxs = ipx_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_ipx *, usipx, msg->msg_name);
- struct sockaddr_ipx local_sipx;
- int rc = -EINVAL;
- int flags = msg->msg_flags;
-
- lock_sock(sk);
- /* Socket gets bound below anyway */
-/* if (sk->sk_zapped)
- return -EIO; */ /* Socket not bound */
- if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
- goto out;
-
- /* Max possible packet size limited by 16 bit pktsize in header */
- if (len >= 65535 - sizeof(struct ipxhdr))
- goto out;
-
- if (usipx) {
- if (!ipxs->port) {
- struct sockaddr_ipx uaddr;
-
- uaddr.sipx_port = 0;
- uaddr.sipx_network = 0;
-#ifdef CONFIG_IPX_INTERN
- rc = -ENETDOWN;
- if (!ipxs->intrfc)
- goto out; /* Someone zonked the iface */
- memcpy(uaddr.sipx_node, ipxs->intrfc->if_node,
- IPX_NODE_LEN);
-#endif
- rc = __ipx_bind(sock, (struct sockaddr *)&uaddr,
- sizeof(struct sockaddr_ipx));
- if (rc)
- goto out;
- }
-
- rc = -EINVAL;
- if (msg->msg_namelen < sizeof(*usipx) ||
- usipx->sipx_family != AF_IPX)
- goto out;
- } else {
- rc = -ENOTCONN;
- if (sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- usipx = &local_sipx;
- usipx->sipx_family = AF_IPX;
- usipx->sipx_type = ipxs->type;
- usipx->sipx_port = ipxs->dest_addr.sock;
- usipx->sipx_network = ipxs->dest_addr.net;
- memcpy(usipx->sipx_node, ipxs->dest_addr.node, IPX_NODE_LEN);
- }
-
- rc = ipxrtr_route_packet(sk, usipx, msg, len, flags & MSG_DONTWAIT);
- if (rc >= 0)
- rc = len;
-out:
- release_sock(sk);
- return rc;
-}
-
-
-static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
- int flags)
-{
- struct sock *sk = sock->sk;
- struct ipx_sock *ipxs = ipx_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_ipx *, sipx, msg->msg_name);
- struct ipxhdr *ipx = NULL;
- struct sk_buff *skb;
- int copied, rc;
- bool locked = true;
-
- lock_sock(sk);
- /* put the autobinding in */
- if (!ipxs->port) {
- struct sockaddr_ipx uaddr;
-
- uaddr.sipx_port = 0;
- uaddr.sipx_network = 0;
-
-#ifdef CONFIG_IPX_INTERN
- rc = -ENETDOWN;
- if (!ipxs->intrfc)
- goto out; /* Someone zonked the iface */
- memcpy(uaddr.sipx_node, ipxs->intrfc->if_node, IPX_NODE_LEN);
-#endif /* CONFIG_IPX_INTERN */
-
- rc = __ipx_bind(sock, (struct sockaddr *)&uaddr,
- sizeof(struct sockaddr_ipx));
- if (rc)
- goto out;
- }
-
- rc = -ENOTCONN;
- if (sock_flag(sk, SOCK_ZAPPED))
- goto out;
-
- release_sock(sk);
- locked = false;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &rc);
- if (!skb) {
- if (rc == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN))
- rc = 0;
- goto out;
- }
-
- ipx = ipx_hdr(skb);
- copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr);
- if (copied > size) {
- copied = size;
- msg->msg_flags |= MSG_TRUNC;
- }
-
- rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
- if (rc)
- goto out_free;
- if (skb->tstamp)
- sk->sk_stamp = skb->tstamp;
-
- if (sipx) {
- sipx->sipx_family = AF_IPX;
- sipx->sipx_port = ipx->ipx_source.sock;
- memcpy(sipx->sipx_node, ipx->ipx_source.node, IPX_NODE_LEN);
- sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net;
- sipx->sipx_type = ipx->ipx_type;
- sipx->sipx_zero = 0;
- msg->msg_namelen = sizeof(*sipx);
- }
- rc = copied;
-
-out_free:
- skb_free_datagram(sk, skb);
-out:
- if (locked)
- release_sock(sk);
- return rc;
-}
-
-
-static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- int rc = 0;
- long amount = 0;
- struct sock *sk = sock->sk;
- void __user *argp = (void __user *)arg;
-
- lock_sock(sk);
- switch (cmd) {
- case TIOCOUTQ:
- amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amount < 0)
- amount = 0;
- rc = put_user(amount, (int __user *)argp);
- break;
- case TIOCINQ: {
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
- /* These two are safe on a single CPU system as only
- * user tasks fiddle here */
- if (skb)
- amount = skb->len - sizeof(struct ipxhdr);
- rc = put_user(amount, (int __user *)argp);
- break;
- }
- case SIOCADDRT:
- case SIOCDELRT:
- rc = -EPERM;
- if (capable(CAP_NET_ADMIN))
- rc = ipxrtr_ioctl(cmd, argp);
- break;
- case SIOCSIFADDR:
- case SIOCAIPXITFCRT:
- case SIOCAIPXPRISLT:
- rc = -EPERM;
- if (!capable(CAP_NET_ADMIN))
- break;
- /* fall through */
- case SIOCGIFADDR:
- rc = ipxitf_ioctl(cmd, argp);
- break;
- case SIOCIPXCFGDATA:
- rc = ipxcfg_get_config_data(argp);
- break;
- case SIOCIPXNCPCONN:
- /*
- * This socket wants to take care of the NCP connection
- * handed to us in arg.
- */
- rc = -EPERM;
- if (!capable(CAP_NET_ADMIN))
- break;
- rc = get_user(ipx_sk(sk)->ipx_ncp_conn,
- (const unsigned short __user *)argp);
- break;
- case SIOCGSTAMP:
- rc = sock_get_timestamp(sk, argp);
- break;
- case SIOCGIFDSTADDR:
- case SIOCSIFDSTADDR:
- case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
- case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
- rc = -EINVAL;
- break;
- default:
- rc = -ENOIOCTLCMD;
- break;
- }
- release_sock(sk);
-
- return rc;
-}
-
-
-#ifdef CONFIG_COMPAT
-static int ipx_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- /*
- * These 4 commands use same structure on 32bit and 64bit. Rest of IPX
- * commands is handled by generic ioctl code. As these commands are
- * SIOCPROTOPRIVATE..SIOCPROTOPRIVATE+3, they cannot be handled by generic
- * code.
- */
- switch (cmd) {
- case SIOCAIPXITFCRT:
- case SIOCAIPXPRISLT:
- case SIOCIPXCFGDATA:
- case SIOCIPXNCPCONN:
- return ipx_ioctl(sock, cmd, arg);
- default:
- return -ENOIOCTLCMD;
- }
-}
-#endif
-
-static int ipx_shutdown(struct socket *sock, int mode)
-{
- struct sock *sk = sock->sk;
-
- if (mode < SHUT_RD || mode > SHUT_RDWR)
- return -EINVAL;
- /* This maps:
- * SHUT_RD (0) -> RCV_SHUTDOWN (1)
- * SHUT_WR (1) -> SEND_SHUTDOWN (2)
- * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
- */
- ++mode;
-
- lock_sock(sk);
- sk->sk_shutdown |= mode;
- release_sock(sk);
- sk->sk_state_change(sk);
-
- return 0;
-}
-
-/*
- * Socket family declarations
- */
-
-static const struct net_proto_family ipx_family_ops = {
- .family = PF_IPX,
- .create = ipx_create,
- .owner = THIS_MODULE,
-};
-
-static const struct proto_ops ipx_dgram_ops = {
- .family = PF_IPX,
- .owner = THIS_MODULE,
- .release = ipx_release,
- .bind = ipx_bind,
- .connect = ipx_connect,
- .socketpair = sock_no_socketpair,
- .accept = sock_no_accept,
- .getname = ipx_getname,
- .poll = datagram_poll,
- .ioctl = ipx_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ipx_compat_ioctl,
-#endif
- .listen = sock_no_listen,
- .shutdown = ipx_shutdown,
- .setsockopt = ipx_setsockopt,
- .getsockopt = ipx_getsockopt,
- .sendmsg = ipx_sendmsg,
- .recvmsg = ipx_recvmsg,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-static struct packet_type ipx_8023_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_802_3),
- .func = ipx_rcv,
-};
-
-static struct packet_type ipx_dix_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_IPX),
- .func = ipx_rcv,
-};
-
-static struct notifier_block ipx_dev_notifier = {
- .notifier_call = ipxitf_device_event,
-};
-
-static const unsigned char ipx_8022_type = 0xE0;
-static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
-static const char ipx_EII_err_msg[] __initconst =
- KERN_CRIT "IPX: Unable to register with Ethernet II\n";
-static const char ipx_8023_err_msg[] __initconst =
- KERN_CRIT "IPX: Unable to register with 802.3\n";
-static const char ipx_llc_err_msg[] __initconst =
- KERN_CRIT "IPX: Unable to register with 802.2\n";
-static const char ipx_snap_err_msg[] __initconst =
- KERN_CRIT "IPX: Unable to register with SNAP\n";
-
-static int __init ipx_init(void)
-{
- int rc = proto_register(&ipx_proto, 1);
-
- if (rc != 0)
- goto out;
-
- sock_register(&ipx_family_ops);
-
- pEII_datalink = make_EII_client();
- if (pEII_datalink)
- dev_add_pack(&ipx_dix_packet_type);
- else
- printk(ipx_EII_err_msg);
-
- p8023_datalink = make_8023_client();
- if (p8023_datalink)
- dev_add_pack(&ipx_8023_packet_type);
- else
- printk(ipx_8023_err_msg);
-
- p8022_datalink = register_8022_client(ipx_8022_type, ipx_rcv);
- if (!p8022_datalink)
- printk(ipx_llc_err_msg);
-
- pSNAP_datalink = register_snap_client(ipx_snap_id, ipx_rcv);
- if (!pSNAP_datalink)
- printk(ipx_snap_err_msg);
-
- register_netdevice_notifier(&ipx_dev_notifier);
- ipx_register_sysctl();
- ipx_proc_init();
-out:
- return rc;
-}
-
-static void __exit ipx_proto_finito(void)
-{
- ipx_proc_exit();
- ipx_unregister_sysctl();
-
- unregister_netdevice_notifier(&ipx_dev_notifier);
-
- ipxitf_cleanup();
-
- if (pSNAP_datalink) {
- unregister_snap_client(pSNAP_datalink);
- pSNAP_datalink = NULL;
- }
-
- if (p8022_datalink) {
- unregister_8022_client(p8022_datalink);
- p8022_datalink = NULL;
- }
-
- dev_remove_pack(&ipx_8023_packet_type);
- if (p8023_datalink) {
- destroy_8023_client(p8023_datalink);
- p8023_datalink = NULL;
- }
-
- dev_remove_pack(&ipx_dix_packet_type);
- if (pEII_datalink) {
- destroy_EII_client(pEII_datalink);
- pEII_datalink = NULL;
- }
-
- proto_unregister(&ipx_proto);
- sock_unregister(ipx_family_ops.family);
-}
-
-module_init(ipx_init);
-module_exit(ipx_proto_finito);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_IPX);
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
deleted file mode 100644
index 38a3d51d9ead..000000000000
--- a/net/ipx/ipx_proc.c
+++ /dev/null
@@ -1,341 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * IPX proc routines
- *
- * Copyright(C) Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2002
- */
-
-#include <linux/init.h>
-#ifdef CONFIG_PROC_FS
-#include <linux/proc_fs.h>
-#include <linux/spinlock.h>
-#include <linux/seq_file.h>
-#include <linux/export.h>
-#include <net/net_namespace.h>
-#include <net/tcp_states.h>
-#include <net/ipx.h>
-
-static void *ipx_seq_interface_start(struct seq_file *seq, loff_t *pos)
-{
- spin_lock_bh(&ipx_interfaces_lock);
- return seq_list_start_head(&ipx_interfaces, *pos);
-}
-
-static void *ipx_seq_interface_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return seq_list_next(v, &ipx_interfaces, pos);
-}
-
-static void ipx_seq_interface_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_bh(&ipx_interfaces_lock);
-}
-
-static int ipx_seq_interface_show(struct seq_file *seq, void *v)
-{
- struct ipx_interface *i;
-
- if (v == &ipx_interfaces) {
- seq_puts(seq, "Network Node_Address Primary Device "
- "Frame_Type");
-#ifdef IPX_REFCNT_DEBUG
- seq_puts(seq, " refcnt");
-#endif
- seq_puts(seq, "\n");
- goto out;
- }
-
- i = list_entry(v, struct ipx_interface, node);
- seq_printf(seq, "%08X ", ntohl(i->if_netnum));
- seq_printf(seq, "%02X%02X%02X%02X%02X%02X ",
- i->if_node[0], i->if_node[1], i->if_node[2],
- i->if_node[3], i->if_node[4], i->if_node[5]);
- seq_printf(seq, "%-9s", i == ipx_primary_net ? "Yes" : "No");
- seq_printf(seq, "%-11s", ipx_device_name(i));
- seq_printf(seq, "%-9s", ipx_frame_name(i->if_dlink_type));
-#ifdef IPX_REFCNT_DEBUG
- seq_printf(seq, "%6d", refcount_read(&i->refcnt));
-#endif
- seq_puts(seq, "\n");
-out:
- return 0;
-}
-
-static void *ipx_seq_route_start(struct seq_file *seq, loff_t *pos)
-{
- read_lock_bh(&ipx_routes_lock);
- return seq_list_start_head(&ipx_routes, *pos);
-}
-
-static void *ipx_seq_route_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return seq_list_next(v, &ipx_routes, pos);
-}
-
-static void ipx_seq_route_stop(struct seq_file *seq, void *v)
-{
- read_unlock_bh(&ipx_routes_lock);
-}
-
-static int ipx_seq_route_show(struct seq_file *seq, void *v)
-{
- struct ipx_route *rt;
-
- if (v == &ipx_routes) {
- seq_puts(seq, "Network Router_Net Router_Node\n");
- goto out;
- }
-
- rt = list_entry(v, struct ipx_route, node);
-
- seq_printf(seq, "%08X ", ntohl(rt->ir_net));
- if (rt->ir_routed)
- seq_printf(seq, "%08X %02X%02X%02X%02X%02X%02X\n",
- ntohl(rt->ir_intrfc->if_netnum),
- rt->ir_router_node[0], rt->ir_router_node[1],
- rt->ir_router_node[2], rt->ir_router_node[3],
- rt->ir_router_node[4], rt->ir_router_node[5]);
- else
- seq_puts(seq, "Directly Connected\n");
-out:
- return 0;
-}
-
-static __inline__ struct sock *ipx_get_socket_idx(loff_t pos)
-{
- struct sock *s = NULL;
- struct ipx_interface *i;
-
- list_for_each_entry(i, &ipx_interfaces, node) {
- spin_lock_bh(&i->if_sklist_lock);
- sk_for_each(s, &i->if_sklist) {
- if (!pos)
- break;
- --pos;
- }
- spin_unlock_bh(&i->if_sklist_lock);
- if (!pos) {
- if (s)
- goto found;
- break;
- }
- }
- s = NULL;
-found:
- return s;
-}
-
-static void *ipx_seq_socket_start(struct seq_file *seq, loff_t *pos)
-{
- loff_t l = *pos;
-
- spin_lock_bh(&ipx_interfaces_lock);
- return l ? ipx_get_socket_idx(--l) : SEQ_START_TOKEN;
-}
-
-static void *ipx_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct sock* sk, *next;
- struct ipx_interface *i;
- struct ipx_sock *ipxs;
-
- ++*pos;
- if (v == SEQ_START_TOKEN) {
- sk = NULL;
- i = ipx_interfaces_head();
- if (!i)
- goto out;
- sk = sk_head(&i->if_sklist);
- if (sk)
- spin_lock_bh(&i->if_sklist_lock);
- goto out;
- }
- sk = v;
- next = sk_next(sk);
- if (next) {
- sk = next;
- goto out;
- }
- ipxs = ipx_sk(sk);
- i = ipxs->intrfc;
- spin_unlock_bh(&i->if_sklist_lock);
- sk = NULL;
- for (;;) {
- if (i->node.next == &ipx_interfaces)
- break;
- i = list_entry(i->node.next, struct ipx_interface, node);
- spin_lock_bh(&i->if_sklist_lock);
- if (!hlist_empty(&i->if_sklist)) {
- sk = sk_head(&i->if_sklist);
- break;
- }
- spin_unlock_bh(&i->if_sklist_lock);
- }
-out:
- return sk;
-}
-
-static int ipx_seq_socket_show(struct seq_file *seq, void *v)
-{
- struct sock *s;
- struct ipx_sock *ipxs;
-
- if (v == SEQ_START_TOKEN) {
-#ifdef CONFIG_IPX_INTERN
- seq_puts(seq, "Local_Address "
- "Remote_Address Tx_Queue "
- "Rx_Queue State Uid\n");
-#else
- seq_puts(seq, "Local_Address Remote_Address "
- "Tx_Queue Rx_Queue State Uid\n");
-#endif
- goto out;
- }
-
- s = v;
- ipxs = ipx_sk(s);
-#ifdef CONFIG_IPX_INTERN
- seq_printf(seq, "%08X:%02X%02X%02X%02X%02X%02X:%04X ",
- ntohl(ipxs->intrfc->if_netnum),
- ipxs->node[0], ipxs->node[1], ipxs->node[2], ipxs->node[3],
- ipxs->node[4], ipxs->node[5], ntohs(ipxs->port));
-#else
- seq_printf(seq, "%08X:%04X ", ntohl(ipxs->intrfc->if_netnum),
- ntohs(ipxs->port));
-#endif /* CONFIG_IPX_INTERN */
- if (s->sk_state != TCP_ESTABLISHED)
- seq_printf(seq, "%-28s", "Not_Connected");
- else {
- seq_printf(seq, "%08X:%02X%02X%02X%02X%02X%02X:%04X ",
- ntohl(ipxs->dest_addr.net),
- ipxs->dest_addr.node[0], ipxs->dest_addr.node[1],
- ipxs->dest_addr.node[2], ipxs->dest_addr.node[3],
- ipxs->dest_addr.node[4], ipxs->dest_addr.node[5],
- ntohs(ipxs->dest_addr.sock));
- }
-
- seq_printf(seq, "%08X %08X %02X %03u\n",
- sk_wmem_alloc_get(s),
- sk_rmem_alloc_get(s),
- s->sk_state,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)));
-out:
- return 0;
-}
-
-static const struct seq_operations ipx_seq_interface_ops = {
- .start = ipx_seq_interface_start,
- .next = ipx_seq_interface_next,
- .stop = ipx_seq_interface_stop,
- .show = ipx_seq_interface_show,
-};
-
-static const struct seq_operations ipx_seq_route_ops = {
- .start = ipx_seq_route_start,
- .next = ipx_seq_route_next,
- .stop = ipx_seq_route_stop,
- .show = ipx_seq_route_show,
-};
-
-static const struct seq_operations ipx_seq_socket_ops = {
- .start = ipx_seq_socket_start,
- .next = ipx_seq_socket_next,
- .stop = ipx_seq_interface_stop,
- .show = ipx_seq_socket_show,
-};
-
-static int ipx_seq_route_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ipx_seq_route_ops);
-}
-
-static int ipx_seq_interface_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ipx_seq_interface_ops);
-}
-
-static int ipx_seq_socket_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ipx_seq_socket_ops);
-}
-
-static const struct file_operations ipx_seq_interface_fops = {
- .owner = THIS_MODULE,
- .open = ipx_seq_interface_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations ipx_seq_route_fops = {
- .owner = THIS_MODULE,
- .open = ipx_seq_route_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const struct file_operations ipx_seq_socket_fops = {
- .owner = THIS_MODULE,
- .open = ipx_seq_socket_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static struct proc_dir_entry *ipx_proc_dir;
-
-int __init ipx_proc_init(void)
-{
- struct proc_dir_entry *p;
- int rc = -ENOMEM;
-
- ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net);
-
- if (!ipx_proc_dir)
- goto out;
- p = proc_create("interface", S_IRUGO,
- ipx_proc_dir, &ipx_seq_interface_fops);
- if (!p)
- goto out_interface;
-
- p = proc_create("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_fops);
- if (!p)
- goto out_route;
-
- p = proc_create("socket", S_IRUGO, ipx_proc_dir, &ipx_seq_socket_fops);
- if (!p)
- goto out_socket;
-
- rc = 0;
-out:
- return rc;
-out_socket:
- remove_proc_entry("route", ipx_proc_dir);
-out_route:
- remove_proc_entry("interface", ipx_proc_dir);
-out_interface:
- remove_proc_entry("ipx", init_net.proc_net);
- goto out;
-}
-
-void __exit ipx_proc_exit(void)
-{
- remove_proc_entry("interface", ipx_proc_dir);
- remove_proc_entry("route", ipx_proc_dir);
- remove_proc_entry("socket", ipx_proc_dir);
- remove_proc_entry("ipx", init_net.proc_net);
-}
-
-#else /* CONFIG_PROC_FS */
-
-int __init ipx_proc_init(void)
-{
- return 0;
-}
-
-void __exit ipx_proc_exit(void)
-{
-}
-
-#endif /* CONFIG_PROC_FS */
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
deleted file mode 100644
index 3cf93aa9f284..000000000000
--- a/net/ipx/ipx_route.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Implements the IPX routing routines.
- * Code moved from af_ipx.c.
- *
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2003
- *
- * See net/ipx/ChangeLog.
- */
-
-#include <linux/list.h>
-#include <linux/route.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include <net/ipx.h>
-#include <net/sock.h>
-
-LIST_HEAD(ipx_routes);
-DEFINE_RWLOCK(ipx_routes_lock);
-
-extern struct ipx_interface *ipx_internal_net;
-
-extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
-extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
- struct sk_buff *skb, int copy);
-extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
- struct sk_buff *skb, int copy);
-
-struct ipx_route *ipxrtr_lookup(__be32 net)
-{
- struct ipx_route *r;
-
- read_lock_bh(&ipx_routes_lock);
- list_for_each_entry(r, &ipx_routes, node)
- if (r->ir_net == net) {
- ipxrtr_hold(r);
- goto unlock;
- }
- r = NULL;
-unlock:
- read_unlock_bh(&ipx_routes_lock);
- return r;
-}
-
-/*
- * Caller must hold a reference to intrfc
- */
-int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
- unsigned char *node)
-{
- struct ipx_route *rt;
- int rc;
-
- /* Get a route structure; either existing or create */
- rt = ipxrtr_lookup(network);
- if (!rt) {
- rt = kmalloc(sizeof(*rt), GFP_ATOMIC);
- rc = -EAGAIN;
- if (!rt)
- goto out;
-
- refcount_set(&rt->refcnt, 1);
- ipxrtr_hold(rt);
- write_lock_bh(&ipx_routes_lock);
- list_add(&rt->node, &ipx_routes);
- write_unlock_bh(&ipx_routes_lock);
- } else {
- rc = -EEXIST;
- if (intrfc == ipx_internal_net)
- goto out_put;
- }
-
- rt->ir_net = network;
- rt->ir_intrfc = intrfc;
- if (!node) {
- memset(rt->ir_router_node, '\0', IPX_NODE_LEN);
- rt->ir_routed = 0;
- } else {
- memcpy(rt->ir_router_node, node, IPX_NODE_LEN);
- rt->ir_routed = 1;
- }
-
- rc = 0;
-out_put:
- ipxrtr_put(rt);
-out:
- return rc;
-}
-
-void ipxrtr_del_routes(struct ipx_interface *intrfc)
-{
- struct ipx_route *r, *tmp;
-
- write_lock_bh(&ipx_routes_lock);
- list_for_each_entry_safe(r, tmp, &ipx_routes, node)
- if (r->ir_intrfc == intrfc) {
- list_del(&r->node);
- ipxrtr_put(r);
- }
- write_unlock_bh(&ipx_routes_lock);
-}
-
-static int ipxrtr_create(struct ipx_route_definition *rd)
-{
- struct ipx_interface *intrfc;
- int rc = -ENETUNREACH;
-
- /* Find the appropriate interface */
- intrfc = ipxitf_find_using_net(rd->ipx_router_network);
- if (!intrfc)
- goto out;
- rc = ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node);
- ipxitf_put(intrfc);
-out:
- return rc;
-}
-
-static int ipxrtr_delete(__be32 net)
-{
- struct ipx_route *r, *tmp;
- int rc;
-
- write_lock_bh(&ipx_routes_lock);
- list_for_each_entry_safe(r, tmp, &ipx_routes, node)
- if (r->ir_net == net) {
- /* Directly connected; can't lose route */
- rc = -EPERM;
- if (!r->ir_routed)
- goto out;
- list_del(&r->node);
- ipxrtr_put(r);
- rc = 0;
- goto out;
- }
- rc = -ENOENT;
-out:
- write_unlock_bh(&ipx_routes_lock);
- return rc;
-}
-
-/*
- * The skb has to be unshared, we'll end up calling ipxitf_send, that'll
- * modify the packet
- */
-int ipxrtr_route_skb(struct sk_buff *skb)
-{
- struct ipxhdr *ipx = ipx_hdr(skb);
- struct ipx_route *r = ipxrtr_lookup(IPX_SKB_CB(skb)->ipx_dest_net);
-
- if (!r) { /* no known route */
- kfree_skb(skb);
- return 0;
- }
-
- ipxitf_hold(r->ir_intrfc);
- ipxitf_send(r->ir_intrfc, skb, r->ir_routed ?
- r->ir_router_node : ipx->ipx_dest.node);
- ipxitf_put(r->ir_intrfc);
- ipxrtr_put(r);
-
- return 0;
-}
-
-/*
- * Route an outgoing frame from a socket.
- */
-int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
- struct msghdr *msg, size_t len, int noblock)
-{
- struct sk_buff *skb;
- struct ipx_sock *ipxs = ipx_sk(sk);
- struct ipx_interface *intrfc;
- struct ipxhdr *ipx;
- size_t size;
- int ipx_offset;
- struct ipx_route *rt = NULL;
- int rc;
-
- /* Find the appropriate interface on which to send packet */
- if (!usipx->sipx_network && ipx_primary_net) {
- usipx->sipx_network = ipx_primary_net->if_netnum;
- intrfc = ipx_primary_net;
- } else {
- rt = ipxrtr_lookup(usipx->sipx_network);
- rc = -ENETUNREACH;
- if (!rt)
- goto out;
- intrfc = rt->ir_intrfc;
- }
-
- ipxitf_hold(intrfc);
- ipx_offset = intrfc->if_ipx_offset;
- size = sizeof(struct ipxhdr) + len + ipx_offset;
-
- skb = sock_alloc_send_skb(sk, size, noblock, &rc);
- if (!skb)
- goto out_put;
-
- skb_reserve(skb, ipx_offset);
- skb->sk = sk;
-
- /* Fill in IPX header */
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- skb_put(skb, sizeof(struct ipxhdr));
- ipx = ipx_hdr(skb);
- ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
- IPX_SKB_CB(skb)->ipx_tctrl = 0;
- ipx->ipx_type = usipx->sipx_type;
-
- IPX_SKB_CB(skb)->last_hop.index = -1;
-#ifdef CONFIG_IPX_INTERN
- IPX_SKB_CB(skb)->ipx_source_net = ipxs->intrfc->if_netnum;
- memcpy(ipx->ipx_source.node, ipxs->node, IPX_NODE_LEN);
-#else
- rc = ntohs(ipxs->port);
- if (rc == 0x453 || rc == 0x452) {
- /* RIP/SAP special handling for mars_nwe */
- IPX_SKB_CB(skb)->ipx_source_net = intrfc->if_netnum;
- memcpy(ipx->ipx_source.node, intrfc->if_node, IPX_NODE_LEN);
- } else {
- IPX_SKB_CB(skb)->ipx_source_net = ipxs->intrfc->if_netnum;
- memcpy(ipx->ipx_source.node, ipxs->intrfc->if_node,
- IPX_NODE_LEN);
- }
-#endif /* CONFIG_IPX_INTERN */
- ipx->ipx_source.sock = ipxs->port;
- IPX_SKB_CB(skb)->ipx_dest_net = usipx->sipx_network;
- memcpy(ipx->ipx_dest.node, usipx->sipx_node, IPX_NODE_LEN);
- ipx->ipx_dest.sock = usipx->sipx_port;
-
- rc = memcpy_from_msg(skb_put(skb, len), msg, len);
- if (rc) {
- kfree_skb(skb);
- goto out_put;
- }
-
- /* Apply checksum. Not allowed on 802.3 links. */
- if (sk->sk_no_check_tx ||
- intrfc->if_dlink_type == htons(IPX_FRAME_8023))
- ipx->ipx_checksum = htons(0xFFFF);
- else
- ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
-
- rc = ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ?
- rt->ir_router_node : ipx->ipx_dest.node);
-out_put:
- ipxitf_put(intrfc);
- if (rt)
- ipxrtr_put(rt);
-out:
- return rc;
-}
-
-/*
- * We use a normal struct rtentry for route handling
- */
-int ipxrtr_ioctl(unsigned int cmd, void __user *arg)
-{
- struct rtentry rt; /* Use these to behave like 'other' stacks */
- struct sockaddr_ipx *sg, *st;
- int rc = -EFAULT;
-
- if (copy_from_user(&rt, arg, sizeof(rt)))
- goto out;
-
- sg = (struct sockaddr_ipx *)&rt.rt_gateway;
- st = (struct sockaddr_ipx *)&rt.rt_dst;
-
- rc = -EINVAL;
- if (!(rt.rt_flags & RTF_GATEWAY) || /* Direct routes are fixed */
- sg->sipx_family != AF_IPX ||
- st->sipx_family != AF_IPX)
- goto out;
-
- switch (cmd) {
- case SIOCDELRT:
- rc = ipxrtr_delete(st->sipx_network);
- break;
- case SIOCADDRT: {
- struct ipx_route_definition f;
- f.ipx_network = st->sipx_network;
- f.ipx_router_network = sg->sipx_network;
- memcpy(f.ipx_router_node, sg->sipx_node, IPX_NODE_LEN);
- rc = ipxrtr_create(&f);
- break;
- }
- }
-
-out:
- return rc;
-}
diff --git a/net/ipx/pe2.c b/net/ipx/pe2.c
deleted file mode 100644
index ba7d4214bbff..000000000000
--- a/net/ipx/pe2.c
+++ /dev/null
@@ -1,36 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/in.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-#include <net/datalink.h>
-
-static int pEII_request(struct datalink_proto *dl,
- struct sk_buff *skb, unsigned char *dest_node)
-{
- struct net_device *dev = skb->dev;
-
- skb->protocol = htons(ETH_P_IPX);
- dev_hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len);
- return dev_queue_xmit(skb);
-}
-
-struct datalink_proto *make_EII_client(void)
-{
- struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
-
- if (proto) {
- proto->header_length = 0;
- proto->request = pEII_request;
- }
-
- return proto;
-}
-
-void destroy_EII_client(struct datalink_proto *dl)
-{
- kfree(dl);
-}
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
deleted file mode 100644
index c3eef457db88..000000000000
--- a/net/ipx/sysctl_net_ipx.c
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* -*- linux-c -*-
- * sysctl_net_ipx.c: sysctl interface to net IPX subsystem.
- *
- * Begun April 1, 1996, Mike Shaver.
- * Added /proc/sys/net/ipx directory entry (empty =) ). [MS]
- * Added /proc/sys/net/ipx/ipx_pprop_broadcasting - acme March 4, 2001
- */
-
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <net/net_namespace.h>
-#include <net/ipx.h>
-
-#ifndef CONFIG_SYSCTL
-#error This file should not be compiled without CONFIG_SYSCTL defined
-#endif
-
-static struct ctl_table ipx_table[] = {
- {
- .procname = "ipx_pprop_broadcasting",
- .data = &sysctl_ipx_pprop_broadcasting,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { },
-};
-
-static struct ctl_table_header *ipx_table_header;
-
-void ipx_register_sysctl(void)
-{
- ipx_table_header = register_net_sysctl(&init_net, "net/ipx", ipx_table);
-}
-
-void ipx_unregister_sysctl(void)
-{
- unregister_net_sysctl_table(ipx_table_header);
-}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 148533169b1d..64331158d693 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1474,7 +1474,7 @@ done:
return copied;
}
-static inline unsigned int iucv_accept_poll(struct sock *parent)
+static inline __poll_t iucv_accept_poll(struct sock *parent)
{
struct iucv_sock *isk, *n;
struct sock *sk;
@@ -1489,11 +1489,11 @@ static inline unsigned int iucv_accept_poll(struct sock *parent)
return 0;
}
-unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
+__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index bd5723315069..9d5649e4e8b7 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -247,7 +247,6 @@ static int kcm_seq_show(struct seq_file *seq, void *v)
}
static const struct file_operations kcm_seq_fops = {
- .owner = THIS_MODULE,
.open = kcm_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -397,7 +396,6 @@ static int kcm_stats_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations kcm_stats_seq_fops = {
- .owner = THIS_MODULE,
.open = kcm_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b750a22c4b9..4a8d407f8902 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
if (!csk)
return -EINVAL;
- /* We must prevent loops or risk deadlock ! */
- if (csk->sk_family == PF_KCM)
+ /* Only allow TCP sockets to be attached for now */
+ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
+ csk->sk_protocol != IPPROTO_TCP)
+ return -EOPNOTSUPP;
+
+ /* Don't allow listeners or closed sockets */
+ if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
return -EOPNOTSUPP;
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
@@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
return err;
}
- sock_hold(csk);
-
write_lock_bh(&csk->sk_callback_lock);
+
+ /* Check if sk_user_data is aready by KCM or someone else.
+ * Must be done under lock to prevent race conditions.
+ */
+ if (csk->sk_user_data) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ strp_done(&psock->strp);
+ kmem_cache_free(kcm_psockp, psock);
+ return -EALREADY;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
csk->sk_data_ready = psock_data_ready;
csk->sk_write_space = psock_write_space;
csk->sk_state_change = psock_state_change;
+
write_unlock_bh(&csk->sk_callback_lock);
+ sock_hold(csk);
+
/* Finished initialization, now add the psock to the MUX. */
spin_lock_bh(&mux->lock);
head = &mux->psocks;
@@ -1625,60 +1642,30 @@ static struct proto kcm_proto = {
};
/* Clone a kcm socket. */
-static int kcm_clone(struct socket *osock, struct kcm_clone *info,
- struct socket **newsockp)
+static struct file *kcm_clone(struct socket *osock)
{
struct socket *newsock;
struct sock *newsk;
- struct file *newfile;
- int err, newfd;
- err = -ENFILE;
newsock = sock_alloc();
if (!newsock)
- goto out;
+ return ERR_PTR(-ENFILE);
newsock->type = osock->type;
newsock->ops = osock->ops;
__module_get(newsock->ops->owner);
- newfd = get_unused_fd_flags(0);
- if (unlikely(newfd < 0)) {
- err = newfd;
- goto out_fd_fail;
- }
-
- newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
- if (IS_ERR(newfile)) {
- err = PTR_ERR(newfile);
- goto out_sock_alloc_fail;
- }
-
newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
&kcm_proto, true);
if (!newsk) {
- err = -ENOMEM;
- goto out_sk_alloc_fail;
+ sock_release(newsock);
+ return ERR_PTR(-ENOMEM);
}
-
sock_init_data(newsock, newsk);
init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
- fd_install(newfd, newfile);
- *newsockp = newsock;
- info->fd = newfd;
-
- return 0;
-
-out_sk_alloc_fail:
- fput(newfile);
-out_sock_alloc_fail:
- put_unused_fd(newfd);
-out_fd_fail:
- sock_release(newsock);
-out:
- return err;
+ return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
}
static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1708,17 +1695,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
case SIOCKCMCLONE: {
struct kcm_clone info;
- struct socket *newsock = NULL;
-
- err = kcm_clone(sock, &info, &newsock);
- if (!err) {
- if (copy_to_user((void __user *)arg, &info,
- sizeof(info))) {
- err = -EFAULT;
- sys_close(info.fd);
- }
- }
+ struct file *file;
+ info.fd = get_unused_fd_flags(0);
+ if (unlikely(info.fd < 0))
+ return info.fd;
+
+ file = kcm_clone(sock);
+ if (IS_ERR(file)) {
+ put_unused_fd(info.fd);
+ return PTR_ERR(file);
+ }
+ if (copy_to_user((void __user *)arg, &info,
+ sizeof(info))) {
+ put_unused_fd(info.fd);
+ fput(file);
+ return -EFAULT;
+ }
+ fd_install(info.fd, file);
+ err = 0;
break;
}
default:
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3dffb892d52c..7e2e7188e7f4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -401,6 +401,11 @@ static int verify_address_len(const void *p)
#endif
int len;
+ if (sp->sadb_address_len <
+ DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family),
+ sizeof(uint64_t)))
+ return -EINVAL;
+
switch (addr->sa_family) {
case AF_INET:
len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t));
@@ -511,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
uint16_t ext_type;
int ext_len;
+ if (len < sizeof(*ehdr))
+ return -EINVAL;
+
ext_len = ehdr->sadb_ext_len;
ext_len *= sizeof(uint64_t);
ext_type = ehdr->sadb_ext_type;
@@ -2194,8 +2202,10 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
return PTR_ERR(out_skb);
err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(out_skb);
return err;
+ }
out_hdr = (struct sadb_msg *) out_skb->data;
out_hdr->sadb_msg_version = PF_KEY_V2;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 115918ad8eca..194a7483bb93 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -662,10 +662,9 @@ discard:
* |x|S|x|x|x|x|x|x| Sequence Number |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
- * Cookie value, sublayer format and offset (pad) are negotiated with
- * the peer when the session is set up. Unlike L2TPv2, we do not need
- * to parse the packet header to determine if optional fields are
- * present.
+ * Cookie value and sublayer format are negotiated with the peer when
+ * the session is set up. Unlike L2TPv2, we do not need to parse the
+ * packet header to determine if optional fields are present.
*
* Caller must already have parsed the frame and determined that it is
* a data (not control) frame before coming here. Fields up to the
@@ -731,11 +730,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
"%s: recv data ns=%u, session nr=%u\n",
session->name, ns, session->nr);
}
+ ptr += 4;
}
- /* Advance past L2-specific header, if present */
- ptr += session->l2specific_len;
-
if (L2TP_SKB_CB(skb)->has_seq) {
/* Received a packet with sequence numbers. If we're the LNS,
* check if we sre sending sequence numbers and if not,
@@ -780,10 +777,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
}
}
- /* Session data offset is handled differently for L2TPv2 and
- * L2TPv3. For L2TPv2, there is an optional 16-bit value in
- * the header. For L2TPv3, the offset is negotiated using AVPs
- * in the session setup control protocol.
+ /* Session data offset is defined only for L2TPv2 and is
+ * indicated by an optional 16-bit value in the header.
*/
if (tunnel->version == L2TP_HDR_VER_2) {
/* If offset bit set, skip it. */
@@ -791,8 +786,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
offset = ntohs(*(__be16 *)ptr);
ptr += 2 + offset;
}
- } else
- ptr += session->offset;
+ }
offset = ptr - optr;
if (!pskb_may_pull(skb, offset))
@@ -1052,24 +1046,21 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
memcpy(bufp, &session->cookie[0], session->cookie_len);
bufp += session->cookie_len;
}
- if (session->l2specific_len) {
- if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
- u32 l2h = 0;
- if (session->send_seq) {
- l2h = 0x40000000 | session->ns;
- session->ns++;
- session->ns &= 0xffffff;
- l2tp_dbg(session, L2TP_MSG_SEQ,
- "%s: updated ns to %u\n",
- session->name, session->ns);
- }
+ if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+ u32 l2h = 0;
- *((__be32 *) bufp) = htonl(l2h);
+ if (session->send_seq) {
+ l2h = 0x40000000 | session->ns;
+ session->ns++;
+ session->ns &= 0xffffff;
+ l2tp_dbg(session, L2TP_MSG_SEQ,
+ "%s: updated ns to %u\n",
+ session->name, session->ns);
}
- bufp += session->l2specific_len;
+
+ *((__be32 *)bufp) = htonl(l2h);
+ bufp += 4;
}
- if (session->offset)
- bufp += session->offset;
return bufp - optr;
}
@@ -1725,7 +1716,7 @@ int l2tp_session_delete(struct l2tp_session *session)
EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
- * l2specific_len parameters are set.
+ * l2specific_type parameters are set.
*/
void l2tp_session_set_header_len(struct l2tp_session *session, int version)
{
@@ -1734,7 +1725,8 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
if (session->send_seq)
session->hdr_len += 4;
} else {
- session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+ session->hdr_len = 4 + session->cookie_len;
+ session->hdr_len += l2tp_get_l2specific_len(session);
if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
session->hdr_len += 4;
}
@@ -1784,9 +1776,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
session->recv_seq = cfg->recv_seq;
session->lns_mode = cfg->lns_mode;
session->reorder_timeout = cfg->reorder_timeout;
- session->offset = cfg->offset;
session->l2specific_type = cfg->l2specific_type;
- session->l2specific_len = cfg->l2specific_len;
session->cookie_len = cfg->cookie_len;
memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
session->peer_cookie_len = cfg->peer_cookie_len;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9534e16965cc..9bbee90e9963 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -59,8 +59,6 @@ struct l2tp_session_cfg {
int debug; /* bitmask of debug message
* categories */
u16 vlan_id; /* VLAN pseudowire only */
- u16 offset; /* offset to payload */
- u16 l2specific_len; /* Layer 2 specific length */
u16 l2specific_type; /* Layer 2 specific type */
u8 cookie[8]; /* optional cookie */
int cookie_len; /* 0, 4 or 8 bytes */
@@ -86,9 +84,6 @@ struct l2tp_session {
int cookie_len;
u8 peer_cookie[8];
int peer_cookie_len;
- u16 offset; /* offset from end of L2TP header
- to beginning of data */
- u16 l2specific_len;
u16 l2specific_type;
u16 hdr_len;
u32 nr; /* session NR state (receive) */
@@ -305,6 +300,17 @@ static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
l2tp_session_free(session);
}
+static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
+{
+ switch (session->l2specific_type) {
+ case L2TP_L2SPECTYPE_DEFAULT:
+ return 4;
+ case L2TP_L2SPECTYPE_NONE:
+ default:
+ return 0;
+ }
+}
+
#define l2tp_printk(ptr, type, func, fmt, ...) \
do { \
if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index eb69411bcb47..72e713da4733 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -180,8 +180,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
session->lns_mode ? "LNS" : "LAC",
session->debug,
jiffies_to_msecs(session->reorder_timeout));
- seq_printf(m, " offset %hu l2specific %hu/%hu\n",
- session->offset, session->l2specific_type, session->l2specific_len);
+ seq_printf(m, " offset 0 l2specific %hu/%hu\n",
+ session->l2specific_type, l2tp_get_l2specific_len(session));
if (session->cookie_len) {
seq_printf(m, " cookie %02x%02x%02x%02x",
session->cookie[0], session->cookie[1],
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index a1f24fb2be98..e7ea9c4b89ff 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -547,19 +547,19 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
}
if (tunnel->version > 2) {
- if (info->attrs[L2TP_ATTR_OFFSET])
- cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
-
if (info->attrs[L2TP_ATTR_DATA_SEQ])
cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
- cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
- if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
+ if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) {
cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
-
- cfg.l2specific_len = 4;
- if (info->attrs[L2TP_ATTR_L2SPEC_LEN])
- cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]);
+ if (cfg.l2specific_type != L2TP_L2SPECTYPE_DEFAULT &&
+ cfg.l2specific_type != L2TP_L2SPECTYPE_NONE) {
+ ret = -EINVAL;
+ goto out_tunnel;
+ }
+ } else {
+ cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
+ }
if (info->attrs[L2TP_ATTR_COOKIE]) {
u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
@@ -620,27 +620,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
goto out_tunnel;
}
- /* Check that pseudowire-specific params are present */
- switch (cfg.pw_type) {
- case L2TP_PWTYPE_NONE:
- break;
- case L2TP_PWTYPE_ETH_VLAN:
- if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
- ret = -EINVAL;
- goto out_tunnel;
- }
- break;
- case L2TP_PWTYPE_ETH:
- break;
- case L2TP_PWTYPE_PPP:
- case L2TP_PWTYPE_PPP_AC:
- break;
- case L2TP_PWTYPE_IP:
- default:
- ret = -EPROTONOSUPPORT;
- break;
- }
-
ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel,
session_id,
peer_session_id,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index b412fc3351dc..59f246d7b290 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1734,7 +1734,6 @@ static int pppol2tp_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations pppol2tp_proc_fops = {
- .owner = THIS_MODULE,
.open = pppol2tp_proc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 29c509c54bb2..66821e8a2b7a 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -225,7 +225,6 @@ static int llc_seq_core_open(struct inode *inode, struct file *file)
}
static const struct file_operations llc_seq_socket_fops = {
- .owner = THIS_MODULE,
.open = llc_seq_socket_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -233,7 +232,6 @@ static const struct file_operations llc_seq_socket_fops = {
};
static const struct file_operations llc_seq_core_fops = {
- .owner = THIS_MODULE,
.open = llc_seq_core_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index d444752dbf40..a8b1616cec41 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -153,27 +153,16 @@ EXPORT_SYMBOL(ieee80211_stop_rx_ba_session);
*/
static void sta_rx_agg_session_timer_expired(struct timer_list *t)
{
- struct tid_ampdu_rx *tid_rx_timer =
- from_timer(tid_rx_timer, t, session_timer);
- struct sta_info *sta = tid_rx_timer->sta;
- u8 tid = tid_rx_timer->tid;
- struct tid_ampdu_rx *tid_rx;
+ struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, session_timer);
+ struct sta_info *sta = tid_rx->sta;
+ u8 tid = tid_rx->tid;
unsigned long timeout;
- rcu_read_lock();
- tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
- if (!tid_rx) {
- rcu_read_unlock();
- return;
- }
-
timeout = tid_rx->last_rx + TU_TO_JIFFIES(tid_rx->timeout);
if (time_is_after_jiffies(timeout)) {
mod_timer(&tid_rx->session_timer, timeout);
- rcu_read_unlock();
return;
}
- rcu_read_unlock();
ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n",
sta->sta.addr, tid);
@@ -415,10 +404,11 @@ end:
timeout);
}
-void __ieee80211_start_rx_ba_session(struct sta_info *sta,
- u8 dialog_token, u16 timeout,
- u16 start_seq_num, u16 ba_policy, u16 tid,
- u16 buf_size, bool tx, bool auto_seq)
+static void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy,
+ u16 tid, u16 buf_size, bool tx,
+ bool auto_seq)
{
mutex_lock(&sta->ampdu_mlme.mtx);
___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5f8ab5be369f..595c662a61e8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -392,7 +392,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
* telling the driver. New packets will not go through since
* the aggregation session is no longer OPERATIONAL.
*/
- synchronize_net();
+ if (!local->in_reconfig)
+ synchronize_net();
tid_tx->stop_initiator = reason == AGG_STOP_PEER_REQUEST ?
WLAN_BACK_RECIPIENT :
@@ -429,18 +430,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
*/
static void sta_addba_resp_timer_expired(struct timer_list *t)
{
- struct tid_ampdu_tx *tid_tx_timer =
- from_timer(tid_tx_timer, t, addba_resp_timer);
- struct sta_info *sta = tid_tx_timer->sta;
- u8 tid = tid_tx_timer->tid;
- struct tid_ampdu_tx *tid_tx;
+ struct tid_ampdu_tx *tid_tx = from_timer(tid_tx, t, addba_resp_timer);
+ struct sta_info *sta = tid_tx->sta;
+ u8 tid = tid_tx->tid;
/* check if the TID waits for addBA response */
- rcu_read_lock();
- tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
- if (!tid_tx ||
- test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
- rcu_read_unlock();
+ if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
ht_dbg(sta->sdata,
"timer expired on %pM tid %d not expecting addBA response\n",
sta->sta.addr, tid);
@@ -451,7 +446,6 @@ static void sta_addba_resp_timer_expired(struct timer_list *t)
sta->sta.addr, tid);
ieee80211_stop_tx_ba_session(&sta->sta, tid);
- rcu_read_unlock();
}
void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
@@ -529,29 +523,21 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
static void sta_tx_agg_session_timer_expired(struct timer_list *t)
{
- struct tid_ampdu_tx *tid_tx_timer =
- from_timer(tid_tx_timer, t, session_timer);
- struct sta_info *sta = tid_tx_timer->sta;
- u8 tid = tid_tx_timer->tid;
- struct tid_ampdu_tx *tid_tx;
+ struct tid_ampdu_tx *tid_tx = from_timer(tid_tx, t, session_timer);
+ struct sta_info *sta = tid_tx->sta;
+ u8 tid = tid_tx->tid;
unsigned long timeout;
- rcu_read_lock();
- tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
- if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
- rcu_read_unlock();
+ if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
return;
}
timeout = tid_tx->last_tx + TU_TO_JIFFIES(tid_tx->timeout);
if (time_is_after_jiffies(timeout)) {
mod_timer(&tid_tx->session_timer, timeout);
- rcu_read_unlock();
return;
}
- rcu_read_unlock();
-
ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fb15d3b97cb2..46028e12e216 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -573,10 +573,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
offsetof(typeof(kseq), aes_cmac));
+ /* fall through */
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
offsetof(typeof(kseq), aes_gmac));
+ /* fall through */
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
@@ -2205,6 +2207,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
* for now fall through to allow scanning only when
* beaconing hasn't been configured yet
*/
+ /* fall through */
case NL80211_IFTYPE_AP:
/*
* If the scan has been forced (and the driver supports
@@ -2373,10 +2376,17 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata;
enum nl80211_tx_power_setting txp_type = type;
bool update_txp_type = false;
+ bool has_monitor = false;
if (wdev) {
sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ sdata = rtnl_dereference(local->monitor_sdata);
+ if (!sdata)
+ return -EOPNOTSUPP;
+ }
+
switch (type) {
case NL80211_TX_POWER_AUTOMATIC:
sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
@@ -2415,15 +2425,34 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ has_monitor = true;
+ continue;
+ }
sdata->user_power_level = local->user_power_level;
if (txp_type != sdata->vif.bss_conf.txpower_type)
update_txp_type = true;
sdata->vif.bss_conf.txpower_type = txp_type;
}
- list_for_each_entry(sdata, &local->interfaces, list)
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ continue;
ieee80211_recalc_txpower(sdata, update_txp_type);
+ }
mutex_unlock(&local->iflist_mtx);
+ if (has_monitor) {
+ sdata = rtnl_dereference(local->monitor_sdata);
+ if (sdata) {
+ sdata->user_power_level = local->user_power_level;
+ if (txp_type != sdata->vif.bss_conf.txpower_type)
+ update_txp_type = true;
+ sdata->vif.bss_conf.txpower_type = txp_type;
+
+ ieee80211_recalc_txpower(sdata, update_txp_type);
+ }
+ }
+
return 0;
}
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 5fae001f286c..1f466d12a6bc 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -211,6 +211,7 @@ static const char *hw_flag_names[] = {
FLAG(TX_FRAG_LIST),
FLAG(REPORTS_LOW_ACK),
FLAG(SUPPORTS_TX_FRAG),
+ FLAG(SUPPORTS_TDLS_BUFFER_STA),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b15412c21ac9..444ea8d127fe 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -420,7 +420,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
default:
p += scnprintf(p, sizeof(buf) + buf - p,
"\t\tMAX-MPDU-UNKNOWN\n");
- };
+ }
switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case 0:
p += scnprintf(p, sizeof(buf) + buf - p,
@@ -438,7 +438,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
p += scnprintf(p, sizeof(buf) + buf - p,
"\t\tUNKNOWN-MHZ: 0x%x\n",
(vhtc->cap >> 2) & 0x3);
- };
+ }
PFLAG(RXLDPC, "RXLDPC");
PFLAG(SHORT_GI_80, "SHORT-GI-80");
PFLAG(SHORT_GI_160, "SHORT-GI-160");
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c7f93fd9ca7a..4d82fe7d627c 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -165,7 +165,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
sdata->vif.type == NL80211_IFTYPE_NAN ||
(sdata->vif.type == NL80211_IFTYPE_MONITOR &&
- !sdata->vif.mu_mimo_owner)))
+ !sdata->vif.mu_mimo_owner &&
+ !(changed & BSS_CHANGED_TXPOWER))))
return;
if (!check_sdata_in_driver(sdata))
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 41f5e48f8021..d7523530d3f8 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,13 +291,14 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
int i;
mutex_lock(&sta->ampdu_mlme.mtx);
- for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
- ___ieee80211_stop_tx_ba_session(sta, i, reason);
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++)
___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
WLAN_REASON_QSTA_LEAVE_QBSS,
reason != AGG_STOP_DESTROY_STA &&
reason != AGG_STOP_PEER_REQUEST);
- }
+
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++)
+ ___ieee80211_stop_tx_ba_session(sta, i, reason);
mutex_unlock(&sta->ampdu_mlme.mtx);
/* stopping might queue the work again - so cancel only afterwards */
@@ -491,6 +492,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
case IEEE80211_SMPS_AUTOMATIC:
case IEEE80211_SMPS_NUM_MODES:
WARN_ON(1);
+ /* fall through */
case IEEE80211_SMPS_OFF:
action_frame->u.action.u.ht_smps.smps_control =
WLAN_HT_SMPS_CONTROL_DISABLED;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 885d00b41911..26900025de2f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1757,10 +1757,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
u16 initiator, u16 reason, bool stop);
void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
u16 initiator, u16 reason, bool stop);
-void __ieee80211_start_rx_ba_session(struct sta_info *sta,
- u8 dialog_token, u16 timeout,
- u16 start_seq_num, u16 ba_policy, u16 tid,
- u16 buf_size, bool tx, bool auto_seq);
void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 13b16f90e1cf..5fe01f82df12 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1474,7 +1474,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
break;
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
- BUG();
+ WARN_ON(1);
break;
}
@@ -1633,7 +1633,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
goto out_unlock;
}
}
- /* otherwise fall through */
+ /* fall through */
default:
/* assign a new address if possible -- try n_addresses first */
for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 938049395f90..aee05ec3f7ea 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -178,13 +178,17 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
if (!ret) {
key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
- if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
+ if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+ IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
(key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
decrease_tailroom_need_count(sdata, 1);
WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV));
+ WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) &&
+ (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC));
+
return 0;
}
@@ -237,7 +241,8 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
sta = key->sta;
sdata = key->sdata;
- if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
+ if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+ IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
(key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
increment_tailroom_need_count(sdata);
@@ -1104,7 +1109,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
- if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
+ if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+ IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) ||
(key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
increment_tailroom_need_count(key->sdata);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e054a2fd8d38..0785d04a80bc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -263,6 +263,9 @@ static void ieee80211_restart_work(struct work_struct *work)
flush_delayed_work(&local->roc_work);
flush_work(&local->hw_roc_done);
+ /* wait for all packet processing to be done */
+ synchronize_net();
+
ieee80211_reconfig(local);
rtnl_unlock();
}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5e27364e10ac..73ac607beb5d 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -989,8 +989,10 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.bss_conf.chandef.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
sta_flags |= IEEE80211_STA_DISABLE_HT;
+ /* fall through */
case NL80211_CHAN_WIDTH_20:
sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
+ /* fall through */
case NL80211_CHAN_WIDTH_40:
sta_flags |= IEEE80211_STA_DISABLE_VHT;
break;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 4f7826d7b47c..35ad3983ae4b 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
struct mesh_path *mpath;
u8 ttl, flags, hopcount;
const u8 *orig_addr;
- u32 orig_sn, metric, metric_txsta, interval;
+ u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval;
bool root_is_gate;
ttl = rann->rann_ttl;
@@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
interval = le32_to_cpu(rann->rann_interval);
hopcount = rann->rann_hopcount;
hopcount++;
- metric = le32_to_cpu(rann->rann_metric);
+ orig_metric = le32_to_cpu(rann->rann_metric);
/* Ignore our own RANNs */
if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
return;
}
- metric_txsta = airtime_link_metric_get(local, sta);
+ last_hop_metric = airtime_link_metric_get(local, sta);
+ new_metric = orig_metric + last_hop_metric;
+ if (new_metric < orig_metric)
+ new_metric = MAX_METRIC;
mpath = mesh_path_lookup(sdata, orig_addr);
if (!mpath) {
@@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
}
if (!(SN_LT(mpath->sn, orig_sn)) &&
- !(mpath->sn == orig_sn && metric < mpath->rann_metric)) {
+ !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) {
rcu_read_unlock();
return;
}
@@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
}
mpath->sn = orig_sn;
- mpath->rann_metric = metric + metric_txsta;
+ mpath->rann_metric = new_metric;
mpath->is_root = true;
/* Recording RANNs sender address to send individually
* addressed PREQs destined for root mesh STA */
@@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
orig_sn, 0, NULL, 0, broadcast_addr,
hopcount, ttl, interval,
- metric + metric_txsta, 0, sdata);
+ new_metric, 0, sdata);
}
rcu_read_unlock();
@@ -1247,6 +1250,7 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
break;
case IEEE80211_PROACTIVE_PREQ_WITH_PREP:
flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG;
+ /* fall through */
case IEEE80211_PROACTIVE_PREQ_NO_PREP:
interval = ifmsh->mshcfg.dot11MeshHWMPactivePathToRootTimeout;
target_flags |= IEEE80211_PREQ_TO_FLAG |
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 86c8dfef56a4..a5125624a76d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -257,9 +257,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
if (ret)
return NULL;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto err;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -269,7 +267,6 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
if (i++ == idx)
break;
}
-err:
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
@@ -513,9 +510,7 @@ void mesh_plink_broken(struct sta_info *sta)
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -535,7 +530,6 @@ void mesh_plink_broken(struct sta_info *sta)
WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast);
}
}
-out:
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -584,9 +578,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -597,7 +589,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
if (rcu_access_pointer(mpath->next_hop) == sta)
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -614,9 +606,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -627,7 +617,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
if (ether_addr_equal(mpath->mpp, proxy))
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -642,9 +632,7 @@ static void table_flush_by_iface(struct mesh_table *tbl)
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -653,7 +641,7 @@ static void table_flush_by_iface(struct mesh_table *tbl)
break;
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
@@ -873,9 +861,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
if (ret)
return;
- ret = rhashtable_walk_start(&iter);
- if (ret && ret != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&iter);
while ((mpath = rhashtable_walk_next(&iter))) {
if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN)
@@ -887,7 +873,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
__mesh_path_del(tbl, mpath);
}
-out:
+
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index e2d00cce3c17..0f6c9ca59062 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -672,7 +672,7 @@ void mesh_plink_timer(struct timer_list *t)
break;
}
reason = WLAN_REASON_MESH_MAX_RETRIES;
- /* fall through on else */
+ /* fall through */
case NL80211_PLINK_CNF_RCVD:
/* confirm timer */
if (!reason)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 04460440d731..39b660b9a908 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -473,6 +473,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
case IEEE80211_SMPS_AUTOMATIC:
case IEEE80211_SMPS_NUM_MODES:
WARN_ON(1);
+ /* fall through */
case IEEE80211_SMPS_OFF:
cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
IEEE80211_HT_CAP_SM_PS_SHIFT;
@@ -895,7 +896,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif);
+ skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
if (!skb)
return;
@@ -2861,10 +2862,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
- if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14)))
- sdata_info(sdata, "invalid AID value 0x%x; bits 15:14 not set\n",
- aid);
- aid &= ~(BIT(15) | BIT(14));
+ /*
+ * The 5 MSB of the AID field are reserved
+ * (802.11-2016 9.4.1.8 AID field)
+ */
+ aid &= 0x7ff;
ifmgd->broken_ap = false;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index faf4f6055000..f1d40b6645ff 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -801,14 +801,14 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
case NL80211_IFTYPE_ADHOC:
if (!sdata->vif.bss_conf.ibss_joined)
need_offchan = true;
- /* fall through */
#ifdef CONFIG_MAC80211_MESH
+ /* fall through */
case NL80211_IFTYPE_MESH_POINT:
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.mesh_id_len)
need_offchan = true;
- /* fall through */
#endif
+ /* fall through */
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_P2P_GO:
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 70e9d2ca8bbe..fd580614085b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1607,23 +1607,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
/*
* Change STA power saving mode only at the end of a frame
- * exchange sequence.
+ * exchange sequence, and only for a data or management
+ * frame as specified in IEEE 802.11-2016 11.2.3.2
*/
if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
!ieee80211_has_morefrags(hdr->frame_control) &&
- !ieee80211_is_back_req(hdr->frame_control) &&
+ (ieee80211_is_mgmt(hdr->frame_control) ||
+ ieee80211_is_data(hdr->frame_control)) &&
!(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
(rx->sdata->vif.type == NL80211_IFTYPE_AP ||
- rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
- /*
- * PM bit is only checked in frames where it isn't reserved,
- * in AP mode it's reserved in non-bufferable management frames
- * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
- * BAR frames should be ignored as specified in
- * IEEE 802.11-2012 10.2.1.2.
- */
- (!ieee80211_is_mgmt(hdr->frame_control) ||
- ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
+ rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
if (!ieee80211_has_pm(hdr->frame_control))
sta_ps_end(sta);
@@ -3632,6 +3625,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
}
return true;
case NL80211_IFTYPE_MESH_POINT:
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
+ return false;
if (multicast)
return true;
return ether_addr_equal(sdata->vif.addr, hdr->addr1);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 91093d4a2f84..5cd5e6e5834e 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -47,6 +47,8 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
!ifmgd->tdls_wider_bw_prohibited;
+ bool buffer_sta = ieee80211_hw_check(&local->hw,
+ SUPPORTS_TDLS_BUFFER_STA);
struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata);
bool vht = sband && sband->vht_cap.vht_supported;
u8 *pos = skb_put(skb, 10);
@@ -56,7 +58,8 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
*pos++ = 0x0;
*pos++ = 0x0;
*pos++ = 0x0;
- *pos++ = chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0;
+ *pos++ = (chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0) |
+ (buffer_sta ? WLAN_EXT_CAPA4_TDLS_BUFFER_STA : 0);
*pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
*pos++ = 0;
*pos++ = 0;
@@ -236,6 +239,7 @@ static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac)
switch (ac) {
default:
WARN_ON_ONCE(1);
+ /* fall through */
case 0:
return IEEE80211_AC_BE;
case 1:
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7b8154474b9e..25904af38839 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2922,7 +2922,9 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV;
iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE;
- mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC;
+ mmic = build.key->conf.flags &
+ (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+ IEEE80211_KEY_FLAG_PUT_MIC_SPACE);
/* don't handle software crypto */
if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
@@ -4438,13 +4440,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
EXPORT_SYMBOL(ieee80211_pspoll_get);
struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif)
+ struct ieee80211_vif *vif,
+ bool qos_ok)
{
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_if_managed *ifmgd;
struct ieee80211_local *local;
struct sk_buff *skb;
+ bool qos = false;
if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
return NULL;
@@ -4453,7 +4457,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
ifmgd = &sdata->u.mgd;
local = sdata->local;
- skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc));
+ if (qos_ok) {
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ sta = sta_info_get(sdata, ifmgd->bssid);
+ qos = sta && sta->sta.wme;
+ rcu_read_unlock();
+ }
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ sizeof(*nullfunc) + 2);
if (!skb)
return NULL;
@@ -4463,6 +4477,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
IEEE80211_STYPE_NULLFUNC |
IEEE80211_FCTL_TODS);
+ if (qos) {
+ __le16 qos = cpu_to_le16(7);
+
+ BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC |
+ IEEE80211_STYPE_NULLFUNC) !=
+ IEEE80211_STYPE_QOS_NULLFUNC);
+ nullfunc->frame_control |=
+ cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC);
+ skb->priority = 7;
+ skb_set_queue_mapping(skb, IEEE80211_AC_VO);
+ skb_put_data(skb, &qos, sizeof(qos));
+ }
+
memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d57e5f6bd8b6..1f82191ce601 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2110,15 +2110,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0);
wake_up:
- if (local->in_reconfig) {
- local->in_reconfig = false;
- barrier();
-
- /* Restart deferred ROCs */
- mutex_lock(&local->mtx);
- ieee80211_start_next_roc(local);
- mutex_unlock(&local->mtx);
- }
if (local->monitors == local->open_count && local->monitors > 0)
ieee80211_add_virtual_monitor(local);
@@ -2146,6 +2137,16 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mutex_unlock(&local->sta_mtx);
}
+ if (local->in_reconfig) {
+ local->in_reconfig = false;
+ barrier();
+
+ /* Restart deferred ROCs */
+ mutex_lock(&local->mtx);
+ ieee80211_start_next_roc(local);
+ mutex_unlock(&local->mtx);
+ }
+
ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_SUSPEND,
false);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 3e3d3014e9ab..5f7c96368b11 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -165,6 +165,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
qos = sta->sta.wme;
break;
}
+ /* fall through */
case NL80211_IFTYPE_AP:
ra = skb->data;
break;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index b58722d9de37..785056cb76f6 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -1,7 +1,7 @@
/*
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2008, Jouni Malinen <j@w1.fi>
- * Copyright (C) 2016 Intel Deutschland GmbH
+ * Copyright (C) 2016-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -59,8 +59,9 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
if (info->control.hw_key &&
(info->flags & IEEE80211_TX_CTL_DONTFRAG ||
ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) &&
- !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
- /* hwaccel - with no need for SW-generated MMIC */
+ !(tx->key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
+ IEEE80211_KEY_FLAG_PUT_MIC_SPACE))) {
+ /* hwaccel - with no need for SW-generated MMIC or MIC space */
return TX_CONTINUE;
}
@@ -75,8 +76,15 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
skb_tailroom(skb), tail))
return TX_DROP;
- key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
mic = skb_put(skb, MICHAEL_MIC_LEN);
+
+ if (tx->key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) {
+ /* Zeroed MIC can help with debug */
+ memset(mic, 0, MICHAEL_MIC_LEN);
+ return TX_CONTINUE;
+ }
+
+ key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
michael_mic(key, hdr, data, data_len, mic);
if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE))
mic[0]++;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 8ca9915befc8..5dce8336d33f 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2510,12 +2510,15 @@ static int __init mpls_init(void)
rtnl_af_register(&mpls_af_ops);
- rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0);
- rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0);
- rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
- 0);
- rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
- mpls_netconf_dump_devconf, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_NEWROUTE,
+ mpls_rtm_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_DELROUTE,
+ mpls_rtm_delroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETROUTE,
+ mpls_getroute, mpls_dump_routes, 0);
+ rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
+ mpls_netconf_get_devconf,
+ mpls_netconf_dump_devconf, 0);
err = ipgre_tunnel_encap_add_mpls_ops();
if (err)
pr_err("Can't add mpls over gre tunnel ops\n");
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 67e708e98ccf..e7b05de1e6d1 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -143,43 +143,14 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
if (!nc)
return -ENODEV;
- /* If the channel is active one, we need reconfigure it */
spin_lock_irqsave(&nc->lock, flags);
ncm = &nc->modes[NCSI_MODE_LINK];
hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
ncm->data[3] = ntohl(hncdsc->status);
- netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
- nc->id, ncm->data[3] & 0x3 ? "up" : "down");
- if (!list_empty(&nc->link) ||
- nc->state != NCSI_CHANNEL_ACTIVE) {
- spin_unlock_irqrestore(&nc->lock, flags);
- return 0;
- }
-
- spin_unlock_irqrestore(&nc->lock, flags);
- if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1))
- ndp->flags |= NCSI_DEV_RESHUFFLE;
-
- /* If this channel is the active one and the link doesn't
- * work, we have to choose another channel to be active one.
- * The logic here is exactly similar to what we do when link
- * is down on the active channel.
- *
- * On the other hand, we need configure it when host driver
- * state on the active channel becomes ready.
- */
- ncsi_stop_channel_monitor(nc);
-
- spin_lock_irqsave(&nc->lock, flags);
- nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE :
- NCSI_CHANNEL_ACTIVE;
spin_unlock_irqrestore(&nc->lock, flags);
-
- spin_lock_irqsave(&ndp->lock, flags);
- list_add_tail_rcu(&nc->link, &ndp->channel_queue);
- spin_unlock_irqrestore(&ndp->lock, flags);
-
- ncsi_process_next_channel(ndp);
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: host driver %srunning on channel %u\n",
+ ncm->data[3] & 0x1 ? "" : "not ", nc->id);
return 0;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e4a13cc8a2e7..9019fa98003d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -12,6 +12,12 @@ config NETFILTER_INGRESS
config NETFILTER_NETLINK
tristate
+config NETFILTER_FAMILY_BRIDGE
+ bool
+
+config NETFILTER_FAMILY_ARP
+ bool
+
config NETFILTER_NETLINK_ACCT
tristate "Netfilter NFACCT over NFNETLINK interface"
depends on NETFILTER_ADVANCED
@@ -62,6 +68,8 @@ config NF_LOG_NETDEV
select NF_LOG_COMMON
if NF_CONNTRACK
+config NETFILTER_CONNCOUNT
+ tristate
config NF_CONNTRACK_MARK
bool 'Connection mark tracking support'
@@ -497,6 +505,13 @@ config NFT_CT
This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.
+config NFT_FLOW_OFFLOAD
+ depends on NF_CONNTRACK && NF_FLOW_TABLE
+ tristate "Netfilter nf_tables hardware flow offload module"
+ help
+ This option adds the "flow_offload" expression that you can use to
+ choose what flows are placed into the hardware.
+
config NFT_SET_RBTREE
tristate "Netfilter nf_tables rbtree set module"
help
@@ -649,6 +664,23 @@ endif # NF_TABLES_NETDEV
endif # NF_TABLES
+config NF_FLOW_TABLE_INET
+ tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
+ select NF_FLOW_TABLE
+ help
+ This option adds the flow table mixed IPv4/IPv6 support.
+
+ To compile it as a module, choose M here.
+
+config NF_FLOW_TABLE
+ tristate "Netfilter flow table module"
+ depends on NF_CONNTRACK && NF_TABLES
+ help
+ This option adds the flow table core infrastructure.
+
+ To compile it as a module, choose M here.
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
@@ -1120,6 +1152,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
tristate '"connlimit" match support'
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
+ select NETFILTER_CONNCOUNT
---help---
This match allows you to match against the number of parallel
connections to a server per client IP address (or address block).
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f78ed2470831..5d9b8b959e58 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
@@ -67,6 +67,8 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# SYNPROXY
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o
+
# generic packet duplication from netdev family
obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
@@ -84,6 +86,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o
obj-$(CONFIG_NFT_RT) += nft_rt.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
+obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
@@ -107,6 +110,10 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
+# flow table infrastructure
+obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
+obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 52cd2901a097..0f6b8172fb9a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -4,8 +4,7 @@
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
* way.
*
- * Rusty Russell (C)2000 -- This code is GPL.
- * Patrick McHardy (c) 2006-2012
+ * This code is GPL.
*/
#include <linux/kernel.h>
#include <linux/netfilter.h>
@@ -28,34 +27,12 @@
#include "nf_internals.h"
-static DEFINE_MUTEX(afinfo_mutex);
-
-const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
-EXPORT_SYMBOL(nf_afinfo);
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
DEFINE_PER_CPU(bool, nf_skb_duplicated);
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
-int nf_register_afinfo(const struct nf_afinfo *afinfo)
-{
- mutex_lock(&afinfo_mutex);
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
- mutex_unlock(&afinfo_mutex);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_register_afinfo);
-
-void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
-{
- mutex_lock(&afinfo_mutex);
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
- mutex_unlock(&afinfo_mutex);
- synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
-
#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
@@ -74,7 +51,8 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
struct nf_hook_entries *e;
size_t alloc = sizeof(*e) +
sizeof(struct nf_hook_entry) * num +
- sizeof(struct nf_hook_ops *) * num;
+ sizeof(struct nf_hook_ops *) * num +
+ sizeof(struct nf_hook_entries_rcu_head);
if (num == 0)
return NULL;
@@ -85,6 +63,30 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
return e;
}
+static void __nf_hook_entries_free(struct rcu_head *h)
+{
+ struct nf_hook_entries_rcu_head *head;
+
+ head = container_of(h, struct nf_hook_entries_rcu_head, head);
+ kvfree(head->allocation);
+}
+
+static void nf_hook_entries_free(struct nf_hook_entries *e)
+{
+ struct nf_hook_entries_rcu_head *head;
+ struct nf_hook_ops **ops;
+ unsigned int num;
+
+ if (!e)
+ return;
+
+ num = e->num_hook_entries;
+ ops = nf_hook_entries_get_hook_ops(e);
+ head = (void *)&ops[num];
+ head->allocation = e;
+ call_rcu(&head->head, __nf_hook_entries_free);
+}
+
static unsigned int accept_all(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -135,6 +137,12 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
++i;
continue;
}
+
+ if (reg->nat_hook && orig_ops[i]->nat_hook) {
+ kvfree(new);
+ return ERR_PTR(-EBUSY);
+ }
+
if (inserted || reg->priority > orig_ops[i]->priority) {
new_ops[nhooks] = (void *)orig_ops[i];
new->hooks[nhooks] = old->hooks[i];
@@ -237,27 +245,61 @@ out_assign:
return old;
}
-static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries __rcu **
+nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
+ struct net_device *dev)
{
- if (reg->pf != NFPROTO_NETDEV)
- return net->nf.hooks[reg->pf]+reg->hooknum;
+ switch (pf) {
+ case NFPROTO_NETDEV:
+ break;
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ case NFPROTO_ARP:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
+ return NULL;
+ return net->nf.hooks_arp + hooknum;
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
+ return NULL;
+ return net->nf.hooks_bridge + hooknum;
+#endif
+ case NFPROTO_IPV4:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
+ return NULL;
+ return net->nf.hooks_ipv4 + hooknum;
+ case NFPROTO_IPV6:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
+ return NULL;
+ return net->nf.hooks_ipv6 + hooknum;
+#if IS_ENABLED(CONFIG_DECNET)
+ case NFPROTO_DECNET:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
+ return NULL;
+ return net->nf.hooks_decnet + hooknum;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- if (reg->dev && dev_net(reg->dev) == net)
- return &reg->dev->nf_hooks_ingress;
+ if (hooknum == NF_NETDEV_INGRESS) {
+ if (dev && dev_net(dev) == net)
+ return &dev->nf_hooks_ingress;
}
#endif
WARN_ON_ONCE(1);
return NULL;
}
-int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+static int __nf_register_net_hook(struct net *net, int pf,
+ const struct nf_hook_ops *reg)
{
struct nf_hook_entries *p, *new_hooks;
struct nf_hook_entries __rcu **pp;
- if (reg->pf == NFPROTO_NETDEV) {
+ if (pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS)
return -EOPNOTSUPP;
@@ -267,7 +309,7 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
return -EINVAL;
}
- pp = nf_hook_entry_head(net, reg);
+ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
if (!pp)
return -EINVAL;
@@ -285,21 +327,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
#endif
- synchronize_net();
BUG_ON(p == new_hooks);
- kvfree(p);
+ nf_hook_entries_free(p);
return 0;
}
-EXPORT_SYMBOL(nf_register_net_hook);
/*
- * __nf_unregister_net_hook - remove a hook from blob
+ * nf_remove_net_hook - remove a hook from blob
*
* @oldp: current address of hook blob
* @unreg: hook to unregister
@@ -307,8 +347,8 @@ EXPORT_SYMBOL(nf_register_net_hook);
* This cannot fail, hook unregistration must always succeed.
* Therefore replace the to-be-removed hook with a dummy hook.
*/
-static void __nf_unregister_net_hook(struct nf_hook_entries *old,
- const struct nf_hook_ops *unreg)
+static void nf_remove_net_hook(struct nf_hook_entries *old,
+ const struct nf_hook_ops *unreg, int pf)
{
struct nf_hook_ops **orig_ops;
bool found = false;
@@ -326,24 +366,24 @@ static void __nf_unregister_net_hook(struct nf_hook_entries *old,
if (found) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
net_dec_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
+ static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]);
#endif
} else {
- WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+ WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum);
}
}
-void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+static void __nf_unregister_net_hook(struct net *net, int pf,
+ const struct nf_hook_ops *reg)
{
struct nf_hook_entries __rcu **pp;
struct nf_hook_entries *p;
- unsigned int nfq;
- pp = nf_hook_entry_head(net, reg);
+ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
if (!pp)
return;
@@ -355,23 +395,52 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
return;
}
- __nf_unregister_net_hook(p, reg);
+ nf_remove_net_hook(p, reg, pf);
p = __nf_hook_entries_try_shrink(pp);
mutex_unlock(&nf_hook_mutex);
if (!p)
return;
- synchronize_net();
+ nf_queue_nf_hook_drop(net);
+ nf_hook_entries_free(p);
+}
- /* other cpu might still process nfqueue verdict that used reg */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
- synchronize_net();
- kvfree(p);
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ if (reg->pf == NFPROTO_INET) {
+ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+ __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
+ } else {
+ __nf_unregister_net_hook(net, reg->pf, reg);
+ }
}
EXPORT_SYMBOL(nf_unregister_net_hook);
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ int err;
+
+ if (reg->pf == NFPROTO_INET) {
+ err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
+ if (err < 0)
+ return err;
+
+ err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
+ if (err < 0) {
+ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+ return err;
+ }
+ } else {
+ err = __nf_register_net_hook(net, reg->pf, reg);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(nf_register_net_hook);
+
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int n)
{
@@ -395,63 +464,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entries *to_free[16], *p;
- struct nf_hook_entries __rcu **pp;
- unsigned int i, j, n;
-
- mutex_lock(&nf_hook_mutex);
- for (i = 0; i < hookcount; i++) {
- pp = nf_hook_entry_head(net, &reg[i]);
- if (!pp)
- continue;
-
- p = nf_entry_dereference(*pp);
- if (WARN_ON_ONCE(!p))
- continue;
- __nf_unregister_net_hook(p, &reg[i]);
- }
- mutex_unlock(&nf_hook_mutex);
-
- do {
- n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
-
- mutex_lock(&nf_hook_mutex);
-
- for (i = 0, j = 0; i < hookcount && j < n; i++) {
- pp = nf_hook_entry_head(net, &reg[i]);
- if (!pp)
- continue;
-
- p = nf_entry_dereference(*pp);
- if (!p)
- continue;
-
- to_free[j] = __nf_hook_entries_try_shrink(pp);
- if (to_free[j])
- ++j;
- }
-
- mutex_unlock(&nf_hook_mutex);
-
- if (j) {
- unsigned int nfq;
-
- synchronize_net();
-
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
- synchronize_net();
-
- for (i = 0; i < j; i++)
- kvfree(to_free[i]);
- }
+ unsigned int i;
- reg += n;
- hookcount -= n;
- } while (hookcount > 0);
+ for (i = 0; i < hookcount; i++)
+ nf_unregister_net_hook(net, &reg[i]);
}
EXPORT_SYMBOL(nf_unregister_net_hooks);
@@ -569,14 +585,27 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
-static int __net_init netfilter_net_init(struct net *net)
+static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
{
- int i, h;
+ int h;
- for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
- RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
- }
+ for (h = 0; h < max; h++)
+ RCU_INIT_POINTER(e[h], NULL);
+}
+
+static int __net_init netfilter_net_init(struct net *net)
+{
+ __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
+ __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
+#endif
+#if IS_ENABLED(CONFIG_DECNET)
+ __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
+#endif
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 5ca18f07683b..257ca393e6f2 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -127,14 +127,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (ret <= 0)
return ret;
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(x, set)))
- return 0;
- if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
- if (SET_WITH_SKBINFO(set))
- ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
- return 1;
+ return ip_set_match_extensions(set, ext, mext, flags, x);
}
static int
@@ -227,6 +220,7 @@ mtype_list(const struct ip_set *set,
rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < map->elements;
cb->args[IPSET_CB_ARG0]++) {
+ cond_resched_rcu();
id = cb->args[IPSET_CB_ARG0];
x = get_ext(set, map, id);
if (!test_bit(id, map->members) ||
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index d8975a0b4282..488d6d05c65c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -263,12 +263,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
if (ret)
return ret;
- if (first_ip > last_ip) {
- u32 tmp = first_ip;
-
- first_ip = last_ip;
- last_ip = tmp;
- }
+ if (first_ip > last_ip)
+ swap(first_ip, last_ip);
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 4c279fbd2d5d..c00b6a2e8e3c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -337,12 +337,8 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
if (ret)
return ret;
- if (first_ip > last_ip) {
- u32 tmp = first_ip;
-
- first_ip = last_ip;
- last_ip = tmp;
- }
+ if (first_ip > last_ip)
+ swap(first_ip, last_ip);
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f9bbd7c98b5..b561ca8b3659 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -238,12 +238,8 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
- if (first_port > last_port) {
- u16 tmp = first_port;
-
- first_port = last_port;
- last_port = tmp;
- }
+ if (first_port > last_port)
+ swap(first_port, last_port);
elements = last_port - first_port + 1;
set->dsize = ip_set_elem_len(set, tb, 0, 0);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index cf84f7b37cd9..975a85a48d39 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -57,7 +57,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex is held: */
#define ip_set_dereference(p) \
- rcu_dereference_protected(p, 1)
+ rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
#define ip_set(inst, id) \
ip_set_dereference((inst)->ip_set_list)[id]
@@ -472,6 +472,31 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
}
EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+bool
+ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags, void *data)
+{
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(data, set)))
+ return false;
+ if (SET_WITH_COUNTER(set)) {
+ struct ip_set_counter *counter = ext_counter(data, set);
+
+ if (flags & IPSET_FLAG_MATCH_COUNTERS &&
+ !(ip_set_match_counter(ip_set_get_packets(counter),
+ mext->packets, mext->packets_op) &&
+ ip_set_match_counter(ip_set_get_bytes(counter),
+ mext->bytes, mext->bytes_op)))
+ return false;
+ ip_set_update_counter(counter, ext, flags);
+ }
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(data, set),
+ ext, mext, flags);
+ return true;
+}
+EXPORT_SYMBOL_GPL(ip_set_match_extensions);
+
/* Creating/destroying/renaming/swapping affect the existence and
* the properties of a set. All of these can be executed from userspace
* only and serialized by the nfnl mutex indirectly from nfnetlink.
@@ -1386,11 +1411,9 @@ dump_last:
goto next_set;
if (set->variant->uref)
set->variant->uref(set, cb, true);
- /* Fall through and add elements */
+ /* fall through */
default:
- rcu_read_lock_bh();
ret = set->variant->list(set, skb, cb);
- rcu_read_unlock_bh();
if (!cb->args[IPSET_CB_ARG0])
/* Set is done, proceed with next one */
goto next_set;
@@ -2055,6 +2078,7 @@ ip_set_net_exit(struct net *net)
inst->is_deleted = true; /* flag for ip_set_nfnl_put */
+ nfnl_lock(NFNL_SUBSYS_IPSET);
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
if (set) {
@@ -2062,6 +2086,7 @@ ip_set_net_exit(struct net *net)
ip_set_destroy_set(set);
}
}
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
kfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
@@ -2097,7 +2122,6 @@ ip_set_init(void)
return ret;
}
- pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
@@ -2113,3 +2137,5 @@ ip_set_fini(void)
module_init(ip_set_init);
module_exit(ip_set_fini);
+
+MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL));
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index efffc8eabafe..bbad940c0137 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -917,12 +917,9 @@ static inline int
mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
struct ip_set_ext *mext, struct ip_set *set, u32 flags)
{
- if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(data, set),
- ext, mext, flags);
- if (SET_WITH_SKBINFO(set))
- ip_set_get_skbinfo(ext_skbinfo(data, set),
- ext, mext, flags);
+ if (!ip_set_match_extensions(set, ext, mext, flags, data))
+ return 0;
+ /* nomatch entries return -ENOTEMPTY */
return mtype_do_data_match(data);
}
@@ -941,9 +938,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
struct mtype_elem *data;
#if IPSET_NET_COUNT == 2
struct mtype_elem orig = *d;
- int i, j = 0, k;
+ int ret, i, j = 0, k;
#else
- int i, j = 0;
+ int ret, i, j = 0;
#endif
u32 key, multi = 0;
@@ -969,18 +966,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
- if (SET_WITH_TIMEOUT(set)) {
- if (!ip_set_timeout_expired(
- ext_timeout(data, set)))
- return mtype_data_match(data, ext,
- mext, set,
- flags);
+ ret = mtype_data_match(data, ext, mext, set, flags);
+ if (ret != 0)
+ return ret;
#ifdef IP_SET_HASH_WITH_MULTI
- multi = 0;
+ /* No match, reset multiple match flag */
+ multi = 0;
#endif
- } else
- return mtype_data_match(data, ext,
- mext, set, flags);
}
#if IPSET_NET_COUNT == 2
}
@@ -1027,12 +1019,11 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
- if (mtype_data_equal(data, d, &multi) &&
- !(SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))) {
- ret = mtype_data_match(data, ext, mext, set, flags);
+ if (!mtype_data_equal(data, d, &multi))
+ continue;
+ ret = mtype_data_match(data, ext, mext, set, flags);
+ if (ret != 0)
goto out;
- }
}
out:
return ret;
@@ -1143,6 +1134,7 @@ mtype_list(const struct ip_set *set,
rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
+ cond_resched_rcu();
incomplete = skb_tail_pointer(skb);
n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
pr_debug("cb->arg bucket: %lu, t %p n %p\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 0f164e986bf1..88b83d6d3084 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -168,7 +168,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2;
bool with_ports = false;
u8 cidr;
int ret;
@@ -269,22 +269,21 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
}
- if (retried)
+ if (retried) {
ip = ntohl(h->next.ip);
+ p = ntohs(h->next.port);
+ ip2 = ntohl(h->next.ip2);
+ } else {
+ p = port;
+ ip2 = ip2_from;
+ }
for (; ip <= ip_to; ip++) {
e.ip = htonl(ip);
- p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
- : port;
for (; p <= port_to; p++) {
e.port = htons(p);
- ip2 = retried &&
- ip == ntohl(h->next.ip) &&
- p == ntohs(h->next.port)
- ? ntohl(h->next.ip2) : ip2_from;
- while (ip2 <= ip2_to) {
+ do {
e.ip2 = htonl(ip2);
- ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
- &cidr);
+ ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
e.cidr = cidr - 1;
ret = adtfn(set, &e, &ext, &ext, flags);
@@ -292,9 +291,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
ret = 0;
- ip2 = ip2_last + 1;
- }
+ } while (ip2++ < ip2_to);
+ ip2 = ip2_from;
}
+ p = port;
}
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1c67a1761e45..5449e23af13a 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -143,7 +143,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, last;
+ u32 ip = 0, ip_to = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -193,16 +193,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
ip = ntohl(h->next.ip);
- while (ip <= ip_to) {
+ do {
e.ip = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+ ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
ret = 0;
- ip = last + 1;
- }
+ } while (ip++ < ip_to);
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index d417074f1c1a..f5164c1efce2 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -200,7 +200,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, last;
+ u32 ip = 0, ip_to = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -255,17 +255,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- while (ip <= ip_to) {
+ do {
e.ip = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+ ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
ret = 0;
- ip = last + 1;
- }
+ } while (ip++ < ip_to);
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 7f9ae2e9645b..5a2b923bd81f 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -169,8 +169,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, last;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
+ u32 ip = 0, ip_to = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -247,27 +247,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- if (retried)
+ if (retried) {
ip = ntohl(h->next.ip[0]);
+ ip2 = ntohl(h->next.ip[1]);
+ } else {
+ ip2 = ip2_from;
+ }
- while (ip <= ip_to) {
+ do {
e.ip[0] = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
- ip2 = (retried &&
- ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
- : ip2_from;
- while (ip2 <= ip2_to) {
+ ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ do {
e.ip[1] = htonl(ip2);
- last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
+ ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
ret = 0;
- ip2 = last2 + 1;
- }
- ip = last + 1;
- }
+ } while (ip2++ < ip2_to);
+ ip2 = ip2_from;
+ } while (ip++ < ip_to);
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index e6ef382febe4..1a187be9ebc8 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -161,7 +161,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0, last;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -239,25 +239,26 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
- if (retried)
+ if (retried) {
ip = ntohl(h->next.ip);
- while (ip <= ip_to) {
+ p = ntohs(h->next.port);
+ } else {
+ p = port;
+ }
+ do {
e.ip = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &cidr);
+ ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
- p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
- : port;
for (; p <= port_to; p++) {
e.port = htons(p);
ret = adtfn(set, &e, &ext, &ext, flags);
-
if (ret && !ip_set_eexist(ret, flags))
return ret;
ret = 0;
}
- ip = last + 1;
- }
+ p = port;
+ } while (ip++ < ip_to);
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 8602f2595a1a..d391485a6acd 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -184,8 +184,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
+ u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+ u32 ip2_from = 0, ip2_to = 0, ip2;
bool with_ports = false;
int ret;
@@ -288,33 +288,34 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- if (retried)
+ if (retried) {
ip = ntohl(h->next.ip[0]);
+ p = ntohs(h->next.port);
+ ip2 = ntohl(h->next.ip[1]);
+ } else {
+ p = port;
+ ip2 = ip2_from;
+ }
- while (ip <= ip_to) {
+ do {
e.ip[0] = htonl(ip);
- ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
- p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
- : port;
+ ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
for (; p <= port_to; p++) {
e.port = htons(p);
- ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
- p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
- : ip2_from;
- while (ip2 <= ip2_to) {
+ do {
e.ip[1] = htonl(ip2);
- ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
- &e.cidr[1]);
+ ip2 = ip_set_range_to_cidr(ip2, ip2_to,
+ &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
ret = 0;
- ip2 = ip2_last + 1;
- }
+ } while (ip2++ < ip2_to);
+ ip2 = ip2_from;
}
- ip = ip_last + 1;
- }
+ p = port;
+ } while (ip++ < ip_to);
return ret;
}
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e864681b8dc5..072a658fde04 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -55,8 +55,9 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
{
struct list_set *map = set->data;
+ struct ip_set_ext *mext = &opt->ext;
struct set_elem *e;
- u32 cmdflags = opt->cmdflags;
+ u32 flags = opt->cmdflags;
int ret;
/* Don't lookup sub-counters at all */
@@ -64,21 +65,11 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
list_for_each_entry_rcu(e, &map->members, list) {
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
- continue;
ret = ip_set_test(e->id, skb, par, opt);
- if (ret > 0) {
- if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(e, set),
- ext, &opt->ext,
- cmdflags);
- if (SET_WITH_SKBINFO(set))
- ip_set_get_skbinfo(ext_skbinfo(e, set),
- ext, &opt->ext,
- cmdflags);
- return ret;
- }
+ if (ret <= 0)
+ continue;
+ if (ip_set_match_extensions(set, ext, mext, flags, e))
+ return 1;
}
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 299edc6add5a..1c98c907bc63 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -595,7 +595,6 @@ static int ip_vs_app_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_app_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_app_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3e053cb30070..370abbf6f421 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -322,7 +322,7 @@ ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
{
__be16 _ports[2], *pptr;
- pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
if (pptr == NULL)
return 1;
@@ -1143,7 +1143,6 @@ static int ip_vs_conn_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_conn_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_conn_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1221,7 +1220,6 @@ static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_conn_sync_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_conn_sync_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5cb7cac9177d..5f6f73cf2174 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -433,7 +433,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* IPv6 frags, only the first hit here.
*/
- pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
@@ -566,7 +566,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct netns_ipvs *ipvs = svc->ipvs;
struct net *net = ipvs->net;
- pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
if (!pptr)
return NF_DROP;
dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
@@ -982,7 +982,7 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
unsigned int offset;
*related = 1;
- ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
+ ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph);
if (ic == NULL)
return NF_DROP;
@@ -1214,7 +1214,7 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
return NULL;
pptr = frag_safe_skb_hp(skb, iph->len,
- sizeof(_ports), _ports, iph);
+ sizeof(_ports), _ports);
if (!pptr)
return NULL;
@@ -1407,7 +1407,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
__be16 _ports[2], *pptr;
pptr = frag_safe_skb_hp(skb, iph.len,
- sizeof(_ports), _ports, &iph);
+ sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
@@ -1741,7 +1741,7 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
*related = 1;
- ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
+ ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph);
if (ic == NULL)
return NF_DROP;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fff213eacf2a..5ebde4b15810 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2116,7 +2116,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_info_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_info_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2161,7 +2160,6 @@ static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_stats_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2230,7 +2228,6 @@ static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_stats_percpu_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_stats_percpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 121a321b91be..bcd9b7bde4ee 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -315,6 +315,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+ /* fall through */
case CHECKSUM_COMPLETE:
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 30e11cd6aa8a..c15ef7c2a1fa 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -319,6 +319,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
+ /* fall through */
case CHECKSUM_COMPLETE:
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9ee71cb276d7..fbaf3bd05b2e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1636,17 +1636,14 @@ static int
ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
{
struct msghdr msg = {NULL,};
- struct kvec iov;
+ struct kvec iov = {buffer, buflen};
int len;
EnterFunction(7);
/* Receive a packet */
- iov.iov_base = buffer;
- iov.iov_len = (size_t)buflen;
-
- len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
-
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+ len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
if (len < 0)
return len;
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
new file mode 100644
index 000000000000..6d65389e308f
--- /dev/null
+++ b/net/netfilter/nf_conncount.c
@@ -0,0 +1,373 @@
+/*
+ * count the number of connections matching an arbitrary key.
+ *
+ * (C) 2017 Red Hat GmbH
+ * Author: Florian Westphal <fw@strlen.de>
+ *
+ * split from xt_connlimit.c:
+ * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
+ * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
+ * only ignore TIME_WAIT or gone connections
+ * (C) CC Computer Consultants GmbH, 2007
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_count.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#define CONNCOUNT_SLOTS 256U
+
+#ifdef CONFIG_LOCKDEP
+#define CONNCOUNT_LOCK_SLOTS 8U
+#else
+#define CONNCOUNT_LOCK_SLOTS 256U
+#endif
+
+#define CONNCOUNT_GC_MAX_NODES 8
+#define MAX_KEYLEN 5
+
+/* we will save the tuples of all connections we care about */
+struct nf_conncount_tuple {
+ struct hlist_node node;
+ struct nf_conntrack_tuple tuple;
+};
+
+struct nf_conncount_rb {
+ struct rb_node node;
+ struct hlist_head hhead; /* connections/hosts in same subnet */
+ u32 key[MAX_KEYLEN];
+};
+
+static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
+
+struct nf_conncount_data {
+ unsigned int keylen;
+ struct rb_root root[CONNCOUNT_SLOTS];
+};
+
+static u_int32_t conncount_rnd __read_mostly;
+static struct kmem_cache *conncount_rb_cachep __read_mostly;
+static struct kmem_cache *conncount_conn_cachep __read_mostly;
+
+static inline bool already_closed(const struct nf_conn *conn)
+{
+ if (nf_ct_protonum(conn) == IPPROTO_TCP)
+ return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
+ conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
+ else
+ return false;
+}
+
+static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
+{
+ return memcmp(a, b, klen * sizeof(u32));
+}
+
+static bool add_hlist(struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conncount_tuple *conn;
+
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return false;
+ conn->tuple = *tuple;
+ hlist_add_head(&conn->node, head);
+ return true;
+}
+
+static unsigned int check_hlist(struct net *net,
+ struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone,
+ bool *addit)
+{
+ const struct nf_conntrack_tuple_hash *found;
+ struct nf_conncount_tuple *conn;
+ struct hlist_node *n;
+ struct nf_conn *found_ct;
+ unsigned int length = 0;
+
+ *addit = true;
+
+ /* check the saved connections */
+ hlist_for_each_entry_safe(conn, n, head, node) {
+ found = nf_conntrack_find_get(net, zone, &conn->tuple);
+ if (found == NULL) {
+ hlist_del(&conn->node);
+ kmem_cache_free(conncount_conn_cachep, conn);
+ continue;
+ }
+
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
+
+ if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
+ /*
+ * Just to be sure we have it only once in the list.
+ * We should not see tuples twice unless someone hooks
+ * this into a table without "-p tcp --syn".
+ */
+ *addit = false;
+ } else if (already_closed(found_ct)) {
+ /*
+ * we do not care about connections which are
+ * closed already -> ditch it
+ */
+ nf_ct_put(found_ct);
+ hlist_del(&conn->node);
+ kmem_cache_free(conncount_conn_cachep, conn);
+ continue;
+ }
+
+ nf_ct_put(found_ct);
+ length++;
+ }
+
+ return length;
+}
+
+static void tree_nodes_free(struct rb_root *root,
+ struct nf_conncount_rb *gc_nodes[],
+ unsigned int gc_count)
+{
+ struct nf_conncount_rb *rbconn;
+
+ while (gc_count) {
+ rbconn = gc_nodes[--gc_count];
+ rb_erase(&rbconn->node, root);
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ }
+}
+
+static unsigned int
+count_tree(struct net *net, struct rb_root *root,
+ const u32 *key, u8 keylen,
+ u8 family,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+ struct rb_node **rbnode, *parent;
+ struct nf_conncount_rb *rbconn;
+ struct nf_conncount_tuple *conn;
+ unsigned int gc_count;
+ bool no_gc = false;
+
+ restart:
+ gc_count = 0;
+ parent = NULL;
+ rbnode = &(root->rb_node);
+ while (*rbnode) {
+ int diff;
+ bool addit;
+
+ rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
+
+ parent = *rbnode;
+ diff = key_diff(key, rbconn->key, keylen);
+ if (diff < 0) {
+ rbnode = &((*rbnode)->rb_left);
+ } else if (diff > 0) {
+ rbnode = &((*rbnode)->rb_right);
+ } else {
+ /* same source network -> be counted! */
+ unsigned int count;
+ count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
+
+ tree_nodes_free(root, gc_nodes, gc_count);
+ if (!addit)
+ return count;
+
+ if (!add_hlist(&rbconn->hhead, tuple))
+ return 0; /* hotdrop */
+
+ return count + 1;
+ }
+
+ if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+ continue;
+
+ /* only used for GC on hhead, retval and 'addit' ignored */
+ check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
+ if (hlist_empty(&rbconn->hhead))
+ gc_nodes[gc_count++] = rbconn;
+ }
+
+ if (gc_count) {
+ no_gc = true;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ /* tree_node_free before new allocation permits
+ * allocator to re-use newly free'd object.
+ *
+ * This is a rare event; in most cases we will find
+ * existing node to re-use. (or gc_count is 0).
+ */
+ goto restart;
+ }
+
+ /* no match, need to insert new node */
+ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
+ if (rbconn == NULL)
+ return 0;
+
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL) {
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ return 0;
+ }
+
+ conn->tuple = *tuple;
+ memcpy(rbconn->key, key, sizeof(u32) * keylen);
+
+ INIT_HLIST_HEAD(&rbconn->hhead);
+ hlist_add_head(&conn->node, &rbconn->hhead);
+
+ rb_link_node(&rbconn->node, parent, rbnode);
+ rb_insert_color(&rbconn->node, root);
+ return 1;
+}
+
+unsigned int nf_conncount_count(struct net *net,
+ struct nf_conncount_data *data,
+ const u32 *key,
+ unsigned int family,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ struct rb_root *root;
+ int count;
+ u32 hash;
+
+ hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
+ root = &data->root[hash];
+
+ spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+
+ count = count_tree(net, root, key, data->keylen, family, tuple, zone);
+
+ spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+
+ return count;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_count);
+
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+ unsigned int keylen)
+{
+ struct nf_conncount_data *data;
+ int ret, i;
+
+ if (keylen % sizeof(u32) ||
+ keylen / sizeof(u32) > MAX_KEYLEN ||
+ keylen == 0)
+ return ERR_PTR(-EINVAL);
+
+ net_get_random_once(&conncount_rnd, sizeof(conncount_rnd));
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ ret = nf_ct_netns_get(net, family);
+ if (ret < 0) {
+ kfree(data);
+ return ERR_PTR(ret);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(data->root); ++i)
+ data->root[i] = RB_ROOT;
+
+ data->keylen = keylen / sizeof(u32);
+
+ return data;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_init);
+
+static void destroy_tree(struct rb_root *r)
+{
+ struct nf_conncount_tuple *conn;
+ struct nf_conncount_rb *rbconn;
+ struct hlist_node *n;
+ struct rb_node *node;
+
+ while ((node = rb_first(r)) != NULL) {
+ rbconn = rb_entry(node, struct nf_conncount_rb, node);
+
+ rb_erase(node, r);
+
+ hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
+ kmem_cache_free(conncount_conn_cachep, conn);
+
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ }
+}
+
+void nf_conncount_destroy(struct net *net, unsigned int family,
+ struct nf_conncount_data *data)
+{
+ unsigned int i;
+
+ nf_ct_netns_put(net, family);
+
+ for (i = 0; i < ARRAY_SIZE(data->root); ++i)
+ destroy_tree(&data->root[i]);
+
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(nf_conncount_destroy);
+
+static int __init nf_conncount_modinit(void)
+{
+ int i;
+
+ BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
+ BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
+
+ for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
+ spin_lock_init(&nf_conncount_locks[i]);
+
+ conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
+ sizeof(struct nf_conncount_tuple),
+ 0, 0, NULL);
+ if (!conncount_conn_cachep)
+ return -ENOMEM;
+
+ conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
+ sizeof(struct nf_conncount_rb),
+ 0, 0, NULL);
+ if (!conncount_rb_cachep) {
+ kmem_cache_destroy(conncount_conn_cachep);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void __exit nf_conncount_modexit(void)
+{
+ kmem_cache_destroy(conncount_conn_cachep);
+ kmem_cache_destroy(conncount_rb_cachep);
+}
+
+module_init(nf_conncount_modinit);
+module_exit(nf_conncount_modexit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("netfilter: count number of connections matching a key");
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 85f643c1e227..705198de671d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -58,8 +58,6 @@
#include "nf_internals.h"
-#define NF_CONNTRACK_VERSION "0.5.0"
-
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr) __read_mostly;
@@ -901,6 +899,9 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
+ continue;
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@@ -975,6 +976,18 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
return false;
}
+#define DAY (86400 * HZ)
+
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static void nf_ct_offload_timeout(struct nf_conn *ct)
+{
+ if (nf_ct_expires(ct) < DAY / 2)
+ ct->timeout = nfct_time_stamp + DAY;
+}
+
static void gc_worker(struct work_struct *work)
{
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
@@ -1011,6 +1024,11 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++;
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+ nf_ct_offload_timeout(tmp);
+ continue;
+ }
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
expired_count++;
@@ -1044,7 +1062,7 @@ static void gc_worker(struct work_struct *work)
* we will just continue with next hash slot.
*/
rcu_read_unlock();
- cond_resched_rcu_qs();
+ cond_resched();
} while (++buckets < goal);
if (gc_work->exiting)
@@ -2048,10 +2066,6 @@ int nf_conntrack_init_start(void)
if (!nf_conntrack_cachep)
goto err_cachep;
- printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
- NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
- nf_conntrack_max);
-
ret = nf_conntrack_expect_init();
if (ret < 0)
goto err_expect;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index d6748a8a79c5..8ef21d9f9a00 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -649,7 +649,6 @@ static int exp_open(struct inode *inode, struct file *file)
}
static const struct file_operations exp_file_ops = {
- .owner = THIS_MODULE,
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index cf1bf2605c10..1601275efe2d 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -1,4 +1,4 @@
-/****************************************************************************
+/*
* ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
* conntrack/NAT module.
*
@@ -8,7 +8,7 @@
*
* See ip_conntrack_helper_h323_asn1.h for details.
*
- ****************************************************************************/
+ */
#ifdef __KERNEL__
#include <linux/kernel.h>
@@ -103,7 +103,6 @@ struct bitstr {
#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
-#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
static unsigned int get_len(struct bitstr *bs);
static unsigned int get_bit(struct bitstr *bs);
static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -141,14 +140,15 @@ static const decoder_t Decoders[] = {
decode_choice,
};
-/****************************************************************************
+/*
* H.323 Types
- ****************************************************************************/
+ */
#include "nf_conntrack_h323_types.c"
-/****************************************************************************
+/*
* Functions
- ****************************************************************************/
+ */
+
/* Assume bs is aligned && v < 16384 */
static unsigned int get_len(struct bitstr *bs)
{
@@ -165,7 +165,19 @@ static unsigned int get_len(struct bitstr *bs)
return v;
}
-/****************************************************************************/
+static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
+{
+ bits += bs->bit;
+ bytes += bits / BITS_PER_BYTE;
+ if (bits % BITS_PER_BYTE > 0)
+ bytes++;
+
+ if (*bs->cur + bytes > *bs->end)
+ return 1;
+
+ return 0;
+}
+
static unsigned int get_bit(struct bitstr *bs)
{
unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
@@ -175,7 +187,6 @@ static unsigned int get_bit(struct bitstr *bs)
return b;
}
-/****************************************************************************/
/* Assume b <= 8 */
static unsigned int get_bits(struct bitstr *bs, unsigned int b)
{
@@ -201,7 +212,6 @@ static unsigned int get_bits(struct bitstr *bs, unsigned int b)
return v;
}
-/****************************************************************************/
/* Assume b <= 32 */
static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
{
@@ -239,9 +249,9 @@ static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
return v;
}
-/****************************************************************************
+/*
* Assume bs is aligned and sizeof(unsigned int) == 4
- ****************************************************************************/
+ */
static unsigned int get_uint(struct bitstr *bs, int b)
{
unsigned int v = 0;
@@ -250,12 +260,15 @@ static unsigned int get_uint(struct bitstr *bs, int b)
case 4:
v |= *bs->cur++;
v <<= 8;
+ /* fall through */
case 3:
v |= *bs->cur++;
v <<= 8;
+ /* fall through */
case 2:
v |= *bs->cur++;
v <<= 8;
+ /* fall through */
case 1:
v |= *bs->cur++;
break;
@@ -263,7 +276,6 @@ static unsigned int get_uint(struct bitstr *bs, int b)
return v;
}
-/****************************************************************************/
static int decode_nul(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -272,19 +284,17 @@ static int decode_nul(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_bool(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
INC_BIT(bs);
-
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_oid(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -293,15 +303,17 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
+
len = *bs->cur++;
bs->cur += len;
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
- CHECK_BOUND(bs, 0);
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_int(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -319,6 +331,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
bs->cur += 2;
break;
case CONS: /* 64K < Range < 4G */
+ if (nf_h323_error_boundary(bs, 0, 2))
+ return H323_ERROR_BOUND;
len = get_bits(bs, 2) + 1;
BYTE_ALIGN(bs);
if (base && (f->attr & DECODE)) { /* timeToLive */
@@ -330,7 +344,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
break;
case UNCO:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
bs->cur += len;
break;
@@ -341,11 +356,11 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
PRINT("\n");
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_enum(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -357,11 +372,11 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
INC_BITS(bs, f->sz);
}
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -375,12 +390,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
len = f->lb;
break;
case WORD: /* 2-byte length */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) << 8;
len += (*bs->cur++) + f->lb;
break;
case SEMI:
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
break;
default:
@@ -391,11 +408,11 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
bs->cur += len >> 3;
bs->bit = len & 7;
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_numstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -404,16 +421,18 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
/* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
INC_BITS(bs, (len << 2));
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_octstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -440,15 +459,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
break;
case BYTE: /* Range == 256 */
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) + f->lb;
break;
case SEMI:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs) + f->lb;
break;
default: /* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
break;
@@ -458,11 +481,11 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
PRINT("\n");
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -473,10 +496,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
switch (f->sz) {
case BYTE: /* Range == 256 */
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) + f->lb;
break;
default: /* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
break;
@@ -484,11 +510,11 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
bs->cur += len << 1;
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_seq(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -503,9 +529,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
/* Extensible? */
+ if (nf_h323_error_boundary(bs, 0, 1))
+ return H323_ERROR_BOUND;
ext = (f->attr & EXT) ? get_bit(bs) : 0;
/* Get fields bitmap */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -525,9 +555,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Decode */
if (son->attr & OPEN) { /* Open field */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
" ", son->name);
@@ -555,8 +587,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
/* Get the extension bitmap */
+ if (nf_h323_error_boundary(bs, 0, 7))
+ return H323_ERROR_BOUND;
bmp2_len = get_bits(bs, 7) + 1;
- CHECK_BOUND(bs, (bmp2_len + 7) >> 3);
+ if (nf_h323_error_boundary(bs, 0, bmp2_len))
+ return H323_ERROR_BOUND;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
@@ -567,9 +602,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
/* Check Range */
if (i >= f->ub) { /* Newer Version? */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
bs->cur += len;
continue;
}
@@ -583,9 +620,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
if (!((0x80000000 >> opt) & bmp2)) /* Not present */
continue;
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
son->name);
@@ -605,7 +644,6 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_seqof(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -623,22 +661,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
switch (f->sz) {
case BYTE:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
count = *bs->cur++;
break;
case WORD:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
count = *bs->cur++;
count <<= 8;
count += *bs->cur++;
break;
case SEMI:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
count = get_len(bs);
break;
default:
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
count = get_bits(bs, f->sz);
break;
}
@@ -658,8 +701,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
for (i = 0; i < count; i++) {
if (son->attr & OPEN) {
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
" ", son->name);
@@ -694,8 +740,6 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-
-/****************************************************************************/
static int decode_choice(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -710,11 +754,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
/* Decode the choice index number */
+ if (nf_h323_error_boundary(bs, 0, 1))
+ return H323_ERROR_BOUND;
if ((f->attr & EXT) && get_bit(bs)) {
ext = 1;
+ if (nf_h323_error_boundary(bs, 0, 7))
+ return H323_ERROR_BOUND;
type = get_bits(bs, 7) + f->lb;
} else {
ext = 0;
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
type = get_bits(bs, f->sz);
if (type >= f->lb)
return H323_ERROR_RANGE;
@@ -727,8 +777,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
/* Check Range */
if (type >= f->ub) { /* Newer version? */
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
bs->cur += len;
return H323_ERROR_NONE;
}
@@ -742,8 +795,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
if (ext || (son->attr & OPEN)) {
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
son->name);
@@ -765,7 +821,6 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
{
static const struct field_t ras_message = {
@@ -781,7 +836,6 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
return decode_choice(&bs, &ras_message, (char *) ras, 0);
}
-/****************************************************************************/
static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
size_t sz, H323_UserInformation *uuie)
{
@@ -799,7 +853,6 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
return decode_seq(&bs, &h323_userinformation, (char *) uuie, 0);
}
-/****************************************************************************/
int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
MultimediaSystemControlMessage *
mscm)
@@ -818,7 +871,6 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
(char *) mscm, 0);
}
-/****************************************************************************/
int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
{
unsigned char *p = buf;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f71f0d2558fd..005589c6d0f6 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -24,6 +24,7 @@
#include <linux/skbuff.h>
#include <net/route.h>
#include <net/ip6_route.h>
+#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -115,7 +116,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245;
static struct nf_conntrack_helper nf_conntrack_helper_q931[];
static struct nf_conntrack_helper nf_conntrack_helper_ras[];
-/****************************************************************************/
static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
unsigned char **data, int *datalen, int *dataoff)
@@ -219,7 +219,6 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
return 0;
}
-/****************************************************************************/
static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
H245_TransportAddress *taddr,
union nf_inet_addr *addr, __be16 *port)
@@ -254,7 +253,6 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
return 1;
}
-/****************************************************************************/
static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -328,7 +326,6 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int expect_t120(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -380,7 +377,6 @@ static int expect_t120(struct sk_buff *skb,
return ret;
}
-/****************************************************************************/
static int process_h245_channel(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -410,7 +406,6 @@ static int process_h245_channel(struct sk_buff *skb,
return 0;
}
-/****************************************************************************/
static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -472,7 +467,6 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -542,7 +536,6 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -578,7 +571,6 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int h245_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -628,7 +620,6 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP;
}
-/****************************************************************************/
static const struct nf_conntrack_expect_policy h245_exp_policy = {
.max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */,
.timeout = 240,
@@ -643,7 +634,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
.expect_policy = &h245_exp_policy,
};
-/****************************************************************************/
int get_h225_addr(struct nf_conn *ct, unsigned char *data,
TransportAddress *taddr,
union nf_inet_addr *addr, __be16 *port)
@@ -675,7 +665,6 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
return 1;
}
-/****************************************************************************/
static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -726,20 +715,15 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
}
/* If the calling party is on the same side of the forward-to party,
- * we don't need to track the second call */
+ * we don't need to track the second call
+ */
static int callforward_do_filter(struct net *net,
const union nf_inet_addr *src,
const union nf_inet_addr *dst,
u_int8_t family)
{
- const struct nf_afinfo *afinfo;
int ret = 0;
- /* rcu_read_lock()ed by nf_hook_thresh */
- afinfo = nf_get_afinfo(family);
- if (!afinfo)
- return 0;
-
switch (family) {
case AF_INET: {
struct flowi4 fl1, fl2;
@@ -750,10 +734,10 @@ static int callforward_do_filter(struct net *net,
memset(&fl2, 0, sizeof(fl2));
fl2.daddr = dst->ip;
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
- flowi4_to_flowi(&fl1), false)) {
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
- flowi4_to_flowi(&fl2), false)) {
+ if (!nf_ip_route(net, (struct dst_entry **)&rt1,
+ flowi4_to_flowi(&fl1), false)) {
+ if (!nf_ip_route(net, (struct dst_entry **)&rt2,
+ flowi4_to_flowi(&fl2), false)) {
if (rt_nexthop(rt1, fl1.daddr) ==
rt_nexthop(rt2, fl2.daddr) &&
rt1->dst.dev == rt2->dst.dev)
@@ -766,18 +750,23 @@ static int callforward_do_filter(struct net *net,
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
case AF_INET6: {
- struct flowi6 fl1, fl2;
+ const struct nf_ipv6_ops *v6ops;
struct rt6_info *rt1, *rt2;
+ struct flowi6 fl1, fl2;
+
+ v6ops = nf_get_ipv6_ops();
+ if (!v6ops)
+ return 0;
memset(&fl1, 0, sizeof(fl1));
fl1.daddr = src->in6;
memset(&fl2, 0, sizeof(fl2));
fl2.daddr = dst->in6;
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
- flowi6_to_flowi(&fl1), false)) {
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
- flowi6_to_flowi(&fl2), false)) {
+ if (!v6ops->route(net, (struct dst_entry **)&rt1,
+ flowi6_to_flowi(&fl1), false)) {
+ if (!v6ops->route(net, (struct dst_entry **)&rt2,
+ flowi6_to_flowi(&fl2), false)) {
if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
rt6_nexthop(rt2, &fl2.daddr)) &&
rt1->dst.dev == rt2->dst.dev)
@@ -794,7 +783,6 @@ static int callforward_do_filter(struct net *net,
}
-/****************************************************************************/
static int expect_callforwarding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -815,7 +803,8 @@ static int expect_callforwarding(struct sk_buff *skb,
return 0;
/* If the calling party is on the same side of the forward-to party,
- * we don't need to track the second call */
+ * we don't need to track the second call
+ */
if (callforward_filter &&
callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3,
nf_ct_l3num(ct))) {
@@ -854,7 +843,6 @@ static int expect_callforwarding(struct sk_buff *skb,
return ret;
}
-/****************************************************************************/
static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -925,7 +913,6 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_callproceeding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -958,7 +945,6 @@ static int process_callproceeding(struct sk_buff *skb,
return 0;
}
-/****************************************************************************/
static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -990,7 +976,6 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1022,7 +1007,6 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1063,7 +1047,6 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1095,7 +1078,6 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -1154,7 +1136,6 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int q931_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -1203,7 +1184,6 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP;
}
-/****************************************************************************/
static const struct nf_conntrack_expect_policy q931_exp_policy = {
/* T.120 and H.245 */
.max_expected = H323_RTP_CHANNEL_MAX * 4 + 4,
@@ -1231,7 +1211,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
},
};
-/****************************************************************************/
static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
int *datalen)
{
@@ -1249,7 +1228,6 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
}
-/****************************************************************************/
static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
union nf_inet_addr *addr,
__be16 port)
@@ -1270,7 +1248,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
return NULL;
}
-/****************************************************************************/
static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data,
@@ -1328,7 +1305,6 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1346,7 +1322,6 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1391,7 +1366,6 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1428,7 +1402,6 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1480,7 +1453,6 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1514,7 +1486,6 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1559,7 +1530,6 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1608,7 +1578,6 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1626,7 +1595,6 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1666,7 +1634,6 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1700,7 +1667,6 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1745,7 +1711,6 @@ static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int ras_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -1788,7 +1753,6 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP;
}
-/****************************************************************************/
static const struct nf_conntrack_expect_policy ras_exp_policy = {
.max_expected = 32,
.timeout = 240,
@@ -1849,7 +1813,6 @@ static void __exit h323_helper_exit(void)
nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
}
-/****************************************************************************/
static void __exit nf_conntrack_h323_fini(void)
{
h323_helper_exit();
@@ -1857,7 +1820,6 @@ static void __exit nf_conntrack_h323_fini(void)
pr_debug("nf_ct_h323: fini\n");
}
-/****************************************************************************/
static int __init nf_conntrack_h323_init(void)
{
int ret;
@@ -1877,7 +1839,6 @@ err1:
return ret;
}
-/****************************************************************************/
module_init(nf_conntrack_h323_init);
module_exit(nf_conntrack_h323_fini);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59c08997bfdf..dd177ebee9aa 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_labels.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
@@ -58,8 +57,6 @@
MODULE_LICENSE("GPL");
-static char __initdata version[] = "0.93";
-
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *l4proto)
@@ -545,7 +542,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- len += l4proto->nla_size;
+ len += l4proto->nlattr_size;
if (l4proto->nlattr_tuple_size) {
len4 = l4proto->nlattr_tuple_size();
len4 *= 3u; /* ORIG, REPLY, MASTER */
@@ -1111,6 +1108,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
.len = NF_CT_LABELS_MAX_SIZE },
};
+static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
+{
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return 0;
+
+ return ctnetlink_filter_match(ct, data);
+}
+
static int ctnetlink_flush_conntrack(struct net *net,
const struct nlattr * const cda[],
u32 portid, int report)
@@ -1123,7 +1128,7 @@ static int ctnetlink_flush_conntrack(struct net *net,
return PTR_ERR(filter);
}
- nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
+ nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
portid, report);
kfree(filter);
@@ -1169,6 +1174,11 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
+ nf_ct_put(ct);
+ return -EBUSY;
+ }
+
if (cda[CTA_ID]) {
u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
if (id != (u32)(unsigned long)ct) {
@@ -1566,9 +1576,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
static int ctnetlink_change_timeout(struct nf_conn *ct,
const struct nlattr * const cda[])
{
- u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
+ u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
- ct->timeout = nfct_time_stamp + timeout * HZ;
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ ct->timeout = nfct_time_stamp + (u32)timeout;
if (test_bit(IPS_DYING_BIT, &ct->status))
return -ETIME;
@@ -1768,6 +1780,7 @@ ctnetlink_create_conntrack(struct net *net,
int err = -EINVAL;
struct nf_conntrack_helper *helper;
struct nf_conn_tstamp *tstamp;
+ u64 timeout;
ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
if (IS_ERR(ct))
@@ -1776,7 +1789,10 @@ ctnetlink_create_conntrack(struct net *net,
if (!cda[CTA_TIMEOUT])
goto err1;
- ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+ timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ ct->timeout = (u32)timeout + nfct_time_stamp;
rcu_read_lock();
if (cda[CTA_HELP]) {
@@ -3407,7 +3423,6 @@ static int __init ctnetlink_init(void)
{
int ret;
- pr_info("ctnetlink v%s: registering with nfnetlink.\n", version);
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
@@ -3441,8 +3456,6 @@ err_out:
static void __exit ctnetlink_exit(void)
{
- pr_info("ctnetlink: unregistering from nfnetlink.\n");
-
unregister_pernet_subsys(&ctnetlink_net_ops);
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
nfnetlink_subsys_unregister(&ctnl_subsys);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c8e9c9503a08..afdeca53e88b 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -385,14 +385,14 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
/* FIXME: Allow NULL functions and sub in pointers to generic for
them. --RR */
-int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
return -EBUSY;
- if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
+ if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
(l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
return -EINVAL;
@@ -428,10 +428,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
goto out_unlock;
}
- l4proto->nla_size = 0;
- if (l4proto->nlattr_size)
- l4proto->nla_size += l4proto->nlattr_size();
-
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
out_unlock:
@@ -502,7 +498,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
+int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL, ver;
@@ -524,7 +520,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *const l4proto[],
+ const struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL;
@@ -545,7 +541,7 @@ int nf_ct_l4proto_pernet_register(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
+void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
mutex_lock(&nf_ct_proto_mutex);
@@ -555,12 +551,12 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
synchronize_net();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l4proto, l4proto);
+ nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *const l4proto[],
+ const struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
while (num_proto-- != 0)
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 2a446f4a554c..abe647d5b8c6 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -654,6 +654,12 @@ static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
[CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC },
};
+#define DCCP_NLATTR_SIZE ( \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \
+ NLA_ALIGN(NLA_HDRLEN + 0))
+
static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
{
struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
@@ -691,13 +697,6 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
spin_unlock_bh(&ct->lock);
return 0;
}
-
-static int dccp_nlattr_size(void)
-{
- return nla_total_size(0) /* CTA_PROTOINFO_DCCP */
- + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1);
-}
-
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -862,7 +861,7 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.dccp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
@@ -876,8 +875,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.print_conntrack = dccp_print_conntrack,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = DCCP_NLATTR_SIZE,
.to_nlattr = dccp_to_nlattr,
- .nlattr_size = dccp_nlattr_size,
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -898,7 +897,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
-struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
@@ -912,8 +911,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.print_conntrack = dccp_print_conntrack,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = DCCP_NLATTR_SIZE,
.to_nlattr = dccp_to_nlattr,
- .nlattr_size = dccp_nlattr_size,
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 1f86ddf6649a..6c6896d21cd7 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
-static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+static const unsigned int nf_ct_generic_timeout = 600*HZ;
static bool nf_generic_should_process(u8 proto)
{
@@ -163,7 +163,7 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.generic.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
{
.l3proto = PF_UNSPEC,
.l4proto = 255,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a2503005d80b..d049ea5a3770 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -48,7 +48,7 @@ enum grep_conntrack {
GRE_CT_MAX
};
-static unsigned int gre_timeouts[GRE_CT_MAX] = {
+static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_UNREPLIED] = 30*HZ,
[GRE_CT_REPLIED] = 180*HZ,
};
@@ -352,7 +352,7 @@ static int gre_init_net(struct net *net, u_int16_t proto)
}
/* protocol helper struct */
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
+static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
.pkt_to_tuple = gre_pkt_to_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 80faf04ddf15..fb9a35d16069 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -52,7 +52,7 @@ static const char *const sctp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
+static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_CLOSED] = 10 SECS,
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
@@ -578,6 +578,11 @@ static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = {
[CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 },
};
+#define SCTP_NLATTR_SIZE ( \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + 4) + \
+ NLA_ALIGN(NLA_HDRLEN + 4))
+
static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
{
struct nlattr *attr = cda[CTA_PROTOINFO_SCTP];
@@ -608,12 +613,6 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
-
-static int sctp_nlattr_size(void)
-{
- return nla_total_size(0) /* CTA_PROTOINFO_SCTP */
- + nla_policy_len(sctp_nla_policy, CTA_PROTOINFO_SCTP_MAX + 1);
-}
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -778,7 +777,7 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.sctp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
@@ -793,8 +792,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = SCTP_NLATTR_SIZE,
.to_nlattr = sctp_to_nlattr,
- .nlattr_size = sctp_nlattr_size,
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -815,7 +814,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
-struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
@@ -830,8 +829,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = SCTP_NLATTR_SIZE,
.to_nlattr = sctp_to_nlattr,
- .nlattr_size = sctp_nlattr_size,
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b12fc07111d0..e97cdc1cf98c 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -68,7 +68,7 @@ static const char *const tcp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
+static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
[TCP_CONNTRACK_SYN_SENT] = 2 MINS,
[TCP_CONNTRACK_SYN_RECV] = 60 SECS,
[TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return;
+
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
#endif
@@ -1039,6 +1042,9 @@ static int tcp_packet(struct nf_conn *ct,
IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
timeout = timeouts[TCP_CONNTRACK_UNACK];
+ else if (ct->proto.tcp.last_win == 0 &&
+ timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
+ timeout = timeouts[TCP_CONNTRACK_RETRANS];
else
timeout = timeouts[new_state];
spin_unlock_bh(&ct->lock);
@@ -1219,6 +1225,12 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
[CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
};
+#define TCP_NLATTR_SIZE ( \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \
+ NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))))
+
static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
{
struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
@@ -1271,12 +1283,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
-static int tcp_nlattr_size(void)
-{
- return nla_total_size(0) /* CTA_PROTOINFO_TCP */
- + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
-}
-
static unsigned int tcp_nlattr_tuple_size(void)
{
static unsigned int size __read_mostly;
@@ -1538,7 +1544,7 @@ static struct nf_proto_net *tcp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.tcp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
@@ -1554,11 +1560,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
- .nlattr_size = tcp_nlattr_size,
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = tcp_nlattr_tuple_size,
+ .nlattr_size = TCP_NLATTR_SIZE,
.nla_policy = nf_ct_port_nla_policy,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -1576,7 +1582,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
-struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
@@ -1591,8 +1597,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.error = tcp_error,
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = TCP_NLATTR_SIZE,
.to_nlattr = tcp_to_nlattr,
- .nlattr_size = tcp_nlattr_size,
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3a5f727103af..fe7243970aa4 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -26,7 +26,7 @@
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-static unsigned int udp_timeouts[UDP_CT_MAX] = {
+static const unsigned int udp_timeouts[UDP_CT_MAX] = {
[UDP_CT_UNREPLIED] = 30*HZ,
[UDP_CT_REPLIED] = 180*HZ,
};
@@ -296,7 +296,7 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.udp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
@@ -328,7 +328,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
@@ -360,7 +360,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
#endif
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
@@ -392,7 +392,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5a101caa3e12..9123fdec5e14 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
WARN_ON(!l4proto);
ret = -ENOSPC;
- seq_printf(s, "%-8s %u %-8s %u %ld ",
+ seq_printf(s, "%-8s %u %-8s %u ",
l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
- l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
- nf_ct_expires(ct) / HZ);
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
+
+ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
l4proto->print_conntrack(s, ct);
@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
- if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ seq_puts(s, "[OFFLOAD] ");
+ else if (test_bit(IPS_ASSURED_BIT, &ct->status))
seq_puts(s, "[ASSURED] ");
if (seq_has_overflowed(s))
@@ -378,7 +382,6 @@ static int ct_open(struct inode *inode, struct file *file)
}
static const struct file_operations ct_file_ops = {
- .owner = THIS_MODULE,
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -471,7 +474,6 @@ static int ct_cpu_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ct_cpu_seq_fops = {
- .owner = THIS_MODULE,
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
new file mode 100644
index 000000000000..2f5099cb85b8
--- /dev/null
+++ b/net/netfilter/nf_flow_table.c
@@ -0,0 +1,429 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+struct flow_offload_entry {
+ struct flow_offload flow;
+ struct nf_conn *ct;
+ struct rcu_head rcu_head;
+};
+
+struct flow_offload *
+flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+{
+ struct flow_offload_entry *entry;
+ struct flow_offload *flow;
+
+ if (unlikely(nf_ct_is_dying(ct) ||
+ !atomic_inc_not_zero(&ct->ct_general.use)))
+ return NULL;
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ goto err_ct_refcnt;
+
+ flow = &entry->flow;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+ goto err_dst_cache_original;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+ goto err_dst_cache_reply;
+
+ entry->ct = ct;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
+ case NFPROTO_IPV4:
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
+ break;
+ case NFPROTO_IPV6:
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
+ break;
+ }
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
+ FLOW_OFFLOAD_DIR_ORIGINAL;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
+ FLOW_OFFLOAD_DIR_REPLY;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+
+ if (ct->status & IPS_SRC_NAT)
+ flow->flags |= FLOW_OFFLOAD_SNAT;
+ else if (ct->status & IPS_DST_NAT)
+ flow->flags |= FLOW_OFFLOAD_DNAT;
+
+ return flow;
+
+err_dst_cache_reply:
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+err_dst_cache_original:
+ kfree(entry);
+err_ct_refcnt:
+ nf_ct_put(ct);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(flow_offload_alloc);
+
+void flow_offload_free(struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+ e = container_of(flow, struct flow_offload_entry, flow);
+ kfree(e);
+}
+EXPORT_SYMBOL_GPL(flow_offload_free);
+
+void flow_offload_dead(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_DYING;
+}
+EXPORT_SYMBOL_GPL(flow_offload_dead);
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+{
+ flow->timeout = (u32)jiffies;
+
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(flow_offload_add);
+
+void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+
+ e = container_of(flow, struct flow_offload_entry, flow);
+ kfree_rcu(e, rcu_head);
+}
+EXPORT_SYMBOL_GPL(flow_offload_del);
+
+struct flow_offload_tuple_rhash *
+flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple)
+{
+ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+ *flow_table->type->params);
+}
+EXPORT_SYMBOL_GPL(flow_offload_lookup);
+
+static void nf_flow_release_ct(const struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ e = container_of(flow, struct flow_offload_entry, flow);
+ nf_ct_delete(e->ct, 0, 0);
+ nf_ct_put(e->ct);
+}
+
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ return err;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ iter(flow, data);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
+
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+{
+ return (__s32)(flow->timeout - (u32)jiffies) <= 0;
+}
+
+static inline bool nf_flow_is_dying(const struct flow_offload *flow)
+{
+ return flow->flags & FLOW_OFFLOAD_DYING;
+}
+
+void nf_flow_offload_work_gc(struct work_struct *work)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ goto schedule;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ if (nf_flow_has_expired(flow) ||
+ nf_flow_is_dying(flow)) {
+ flow_offload_del(flow_table, flow);
+ nf_flow_release_ct(flow);
+ }
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
+
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple *tuple = data;
+
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple_rhash *tuplehash = data;
+
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct flow_offload_tuple *tuple = arg->key;
+ const struct flow_offload_tuple_rhash *x = ptr;
+
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ return 1;
+
+ return 0;
+}
+
+const struct rhashtable_params nf_flow_offload_rhash_params = {
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
+ .hashfn = flow_offload_hash,
+ .obj_hashfn = flow_offload_hash_obj,
+ .obj_cmpfn = flow_offload_hash_cmp,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
+
+static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace2(&udph->check, skb, port,
+ new_port, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, __be16 port, __be16 new_port)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+int nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+
+int nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
new file mode 100644
index 000000000000..281209aeba8f
--- /dev/null
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -0,0 +1,48 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return nf_flow_offload_ip_hook(priv, skb, state);
+ case htons(ETH_P_IPV6):
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
+ }
+
+ return NF_ACCEPT;
+}
+
+static struct nf_flowtable_type flowtable_inet = {
+ .family = NFPROTO_INET,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_inet_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_inet_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_inet);
+
+ return 0;
+}
+
+static void __exit nf_flow_inet_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_inet);
+}
+
+module_init(nf_flow_inet_module_init);
+module_exit(nf_flow_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 44284cd2528d..18f6d7ae995b 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -10,7 +10,7 @@
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
const struct nf_hook_entries *entries, unsigned int index,
unsigned int verdict);
-unsigned int nf_queue_nf_hook_drop(struct net *net);
+void nf_queue_nf_hook_drop(struct net *net);
/* nf_log.c */
int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8bb152a7cca4..c2c1b16b7538 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -402,7 +402,6 @@ static int nflog_open(struct inode *inode, struct file *file)
}
static const struct file_operations nflog_file_ops = {
- .owner = THIS_MODULE,
.open = nflog_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f7e21953b1de..d67a96a25a68 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,8 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
@@ -96,30 +98,56 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-unsigned int nf_queue_nf_hook_drop(struct net *net)
+void nf_queue_nf_hook_drop(struct net *net)
{
const struct nf_queue_handler *qh;
- unsigned int count = 0;
rcu_read_lock();
qh = rcu_dereference(net->nf.queue_handler);
if (qh)
- count = qh->nf_hook_drop(net);
+ qh->nf_hook_drop(net);
rcu_read_unlock();
-
- return count;
}
EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
+static void nf_ip_saveroute(const struct sk_buff *skb,
+ struct nf_queue_entry *entry)
+{
+ struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ rt_info->tos = iph->tos;
+ rt_info->daddr = iph->daddr;
+ rt_info->saddr = iph->saddr;
+ rt_info->mark = skb->mark;
+ }
+}
+
+static void nf_ip6_saveroute(const struct sk_buff *skb,
+ struct nf_queue_entry *entry)
+{
+ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ rt_info->daddr = iph->daddr;
+ rt_info->saddr = iph->saddr;
+ rt_info->mark = skb->mark;
+ }
+}
+
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
const struct nf_hook_entries *entries,
unsigned int index, unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
- const struct nf_afinfo *afinfo;
const struct nf_queue_handler *qh;
struct net *net = state->net;
+ unsigned int route_key_size;
/* QUEUE == DROP if no one is waiting, to be safe. */
qh = rcu_dereference(net->nf.queue_handler);
@@ -128,11 +156,19 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
goto err;
}
- afinfo = nf_get_afinfo(state->pf);
- if (!afinfo)
- goto err;
+ switch (state->pf) {
+ case AF_INET:
+ route_key_size = sizeof(struct ip_rt_info);
+ break;
+ case AF_INET6:
+ route_key_size = sizeof(struct ip6_rt_info);
+ break;
+ default:
+ route_key_size = 0;
+ break;
+ }
- entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
if (!entry) {
status = -ENOMEM;
goto err;
@@ -142,12 +178,21 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
.skb = skb,
.state = *state,
.hook_index = index,
- .size = sizeof(*entry) + afinfo->route_key_size,
+ .size = sizeof(*entry) + route_key_size,
};
nf_queue_entry_get_refs(entry);
skb_dst_force(skb);
- afinfo->saveroute(skb, entry);
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ nf_ip_saveroute(skb, entry);
+ break;
+ case AF_INET6:
+ nf_ip6_saveroute(skb, entry);
+ break;
+ }
+
status = qh->outfn(entry, queuenum);
if (status < 0) {
@@ -204,13 +249,31 @@ repeat:
return NF_ACCEPT;
}
+static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
+{
+ switch (pf) {
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ return rcu_dereference(net->nf.hooks_bridge[hooknum]);
+#endif
+ case NFPROTO_IPV4:
+ return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
+ case NFPROTO_IPV6:
+ return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ return NULL;
+}
+
/* Caller must hold rcu read-side lock */
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_hook_entry *hook_entry;
const struct nf_hook_entries *hooks;
struct sk_buff *skb = entry->skb;
- const struct nf_afinfo *afinfo;
const struct net *net;
unsigned int i;
int err;
@@ -219,12 +282,12 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
net = entry->state.net;
pf = entry->state.pf;
- hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
+ hooks = nf_hook_entries_head(net, pf, entry->state.hook);
nf_queue_entry_release_refs(entry);
i = entry->hook_index;
- if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+ if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
kfree_skb(skb);
kfree(entry);
return;
@@ -237,8 +300,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
if (verdict == NF_ACCEPT) {
- afinfo = nf_get_afinfo(entry->state.pf);
- if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
+ if (nf_reroute(skb, entry) < 0)
verdict = NF_DROP;
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 49bd8bb16b18..92139a087260 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -317,7 +317,6 @@ static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations synproxy_cpu_seq_fops = {
- .owner = THIS_MODULE,
.open = synproxy_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d8327b43e4dc..0791813a1e7d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -17,6 +17,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/net_namespace.h>
@@ -24,86 +25,20 @@
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
-
-/**
- * nft_register_afinfo - register nf_tables address family info
- *
- * @afi: address family info to register
- *
- * Register the address family for use with nf_tables. Returns zero on
- * success or a negative errno code otherwise.
- */
-int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
-{
- INIT_LIST_HEAD(&afi->tables);
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail_rcu(&afi->list, &net->nft.af_info);
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nft_register_afinfo);
-
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
-
-/**
- * nft_unregister_afinfo - unregister nf_tables address family info
- *
- * @afi: address family info to unregister
- *
- * Unregister the address family for use with nf_tables.
- */
-void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
-{
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- __nft_release_afinfo(net, afi);
- list_del_rcu(&afi->list);
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-}
-EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
-
-static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
-{
- struct nft_af_info *afi;
-
- list_for_each_entry(afi, &net->nft.af_info, list) {
- if (afi->family == family)
- return afi;
- }
- return NULL;
-}
-
-static struct nft_af_info *
-nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
-{
- struct nft_af_info *afi;
-
- afi = nft_afinfo_lookup(net, family);
- if (afi != NULL)
- return afi;
-#ifdef CONFIG_MODULES
- if (autoload) {
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-afinfo-%u", family);
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- afi = nft_afinfo_lookup(net, family);
- if (afi != NULL)
- return ERR_PTR(-EAGAIN);
- }
-#endif
- return ERR_PTR(-EAFNOSUPPORT);
-}
+static LIST_HEAD(nf_tables_flowtables);
+static u64 table_handle;
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- struct nft_af_info *afi,
+ u8 family,
struct nft_table *table,
struct nft_chain *chain,
const struct nlattr * const *nla)
{
ctx->net = net;
- ctx->afi = afi;
+ ctx->family = family;
ctx->table = table;
ctx->chain = chain;
ctx->nla = nla;
@@ -139,29 +74,26 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
-static int nf_tables_register_hooks(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain,
- unsigned int hook_nops)
+static int nf_tables_register_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return 0;
- return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
- hook_nops);
+ return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
}
-static void nf_tables_unregister_hooks(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain,
- unsigned int hook_nops)
+static void nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return;
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -348,34 +280,99 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
}
+static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_flowtable *flowtable)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type,
+ sizeof(struct nft_trans_flowtable));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWFLOWTABLE)
+ nft_activate_next(ctx->net, flowtable);
+
+ nft_trans_flowtable(trans) = flowtable;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nft_delflowtable(struct nft_ctx *ctx,
+ struct nft_flowtable *flowtable)
+{
+ int err;
+
+ err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+ if (err < 0)
+ return err;
+
+ nft_deactivate_next(ctx->net, flowtable);
+ ctx->table->use--;
+
+ return err;
+}
+
/*
* Tables
*/
-static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+static struct nft_table *nft_table_lookup(const struct net *net,
const struct nlattr *nla,
- u8 genmask)
+ u8 family, u8 genmask)
{
struct nft_table *table;
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(table, &net->nft.tables, list) {
if (!nla_strcmp(nla, table->name) &&
+ table->family == family &&
+ nft_active_genmask(table, genmask))
+ return table;
+ }
+ return NULL;
+}
+
+static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
+{
+ struct nft_table *table;
+
+ list_for_each_entry(table, &net->nft.tables, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
nft_active_genmask(table, genmask))
return table;
}
return NULL;
}
-static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+static struct nft_table *nf_tables_table_lookup(const struct net *net,
const struct nlattr *nla,
- u8 genmask)
+ u8 family, u8 genmask)
+{
+ struct nft_table *table;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ table = nft_table_lookup(net, nla, family, genmask);
+ if (table != NULL)
+ return table;
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_table *table;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- table = nft_table_lookup(afi, nla, genmask);
+ table = nft_table_lookup_byhandle(net, nla, genmask);
if (table != NULL)
return table;
@@ -390,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
static const struct nf_chain_type *
-__nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
+__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
{
int i;
@@ -403,22 +400,20 @@ __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
}
static const struct nf_chain_type *
-nf_tables_chain_type_lookup(const struct nft_af_info *afi,
- const struct nlattr *nla,
- bool autoload)
+nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
{
const struct nf_chain_type *type;
- type = __nf_tables_chain_type_lookup(afi->family, nla);
+ type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return type;
#ifdef CONFIG_MODULES
if (autoload) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-chain-%u-%.*s", afi->family,
+ request_module("nft-chain-%u-%.*s", family,
nla_len(nla), (const char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- type = __nf_tables_chain_type_lookup(afi->family, nla);
+ type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return ERR_PTR(-EAGAIN);
}
@@ -430,6 +425,7 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
+ [NFTA_TABLE_HANDLE] = { .type = NLA_U64 },
};
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -451,7 +447,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
- nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
+ nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
+ nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
+ NFTA_TABLE_PAD))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -476,7 +474,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
goto err;
err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->afi->family, ctx->table);
+ event, 0, ctx->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -493,7 +491,6 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_af_info *afi;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
@@ -502,30 +499,27 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (!nft_is_active(net, table))
- continue;
- if (nf_tables_fill_table_info(skb, net,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWTABLE,
- NLM_F_MULTI,
- afi->family, table) < 0)
- goto done;
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, table))
+ continue;
+ if (nf_tables_fill_table_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWTABLE, NLM_F_MULTI,
+ table->family, table) < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
done:
rcu_read_unlock();
@@ -540,7 +534,6 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_af_info *afi;
const struct nft_table *table;
struct sk_buff *skb2;
int family = nfmsg->nfgen_family;
@@ -553,11 +546,8 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
return netlink_dump_start(nlsk, skb, nlh, &c);
}
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -578,10 +568,7 @@ err:
return err;
}
-static void _nf_tables_table_disable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table,
- u32 cnt)
+static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
{
struct nft_chain *chain;
u32 i = 0;
@@ -595,14 +582,11 @@ static void _nf_tables_table_disable(struct net *net,
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
- afi->nops);
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
}
}
-static int nf_tables_table_enable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table)
+static int nf_tables_table_enable(struct net *net, struct nft_table *table)
{
struct nft_chain *chain;
int err, i = 0;
@@ -613,8 +597,7 @@ static int nf_tables_table_enable(struct net *net,
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
- afi->nops);
+ err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
if (err < 0)
goto err;
@@ -623,15 +606,13 @@ static int nf_tables_table_enable(struct net *net,
return 0;
err:
if (i)
- _nf_tables_table_disable(net, afi, table, i);
+ nft_table_disable(net, table, i);
return err;
}
-static void nf_tables_table_disable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table)
+static void nf_tables_table_disable(struct net *net, struct nft_table *table)
{
- _nf_tables_table_disable(net, afi, table, 0);
+ nft_table_disable(net, table, 0);
}
static int nf_tables_updtable(struct nft_ctx *ctx)
@@ -660,7 +641,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
nft_trans_table_enable(trans) = false;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
+ ret = nf_tables_table_enable(ctx->net, ctx->table);
if (ret >= 0) {
ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
nft_trans_table_enable(trans) = true;
@@ -685,19 +666,14 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
const struct nlattr *name;
- struct nft_af_info *afi;
struct nft_table *table;
int family = nfmsg->nfgen_family;
u32 flags = 0;
struct nft_ctx ctx;
int err;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
name = nla[NFTA_TABLE_NAME];
- table = nf_tables_table_lookup(afi, name, genmask);
+ table = nf_tables_table_lookup(net, name, family, genmask);
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
@@ -707,7 +683,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nf_tables_updtable(&ctx);
}
@@ -717,47 +693,45 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
return -EINVAL;
}
- err = -EAFNOSUPPORT;
- if (!try_module_get(afi->owner))
- goto err1;
-
err = -ENOMEM;
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (table == NULL)
- goto err2;
+ goto err_kzalloc;
table->name = nla_strdup(name, GFP_KERNEL);
if (table->name == NULL)
- goto err3;
+ goto err_strdup;
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
+ INIT_LIST_HEAD(&table->flowtables);
+ table->family = family;
table->flags = flags;
+ table->handle = ++table_handle;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
- goto err4;
+ goto err_trans;
- list_add_tail_rcu(&table->list, &afi->tables);
+ list_add_tail_rcu(&table->list, &net->nft.tables);
return 0;
-err4:
+err_trans:
kfree(table->name);
-err3:
+err_strdup:
kfree(table);
-err2:
- module_put(afi->owner);
-err1:
+err_kzalloc:
return err;
}
static int nft_flush_table(struct nft_ctx *ctx)
{
- int err;
+ struct nft_flowtable *flowtable, *nft;
struct nft_chain *chain, *nc;
struct nft_object *obj, *ne;
struct nft_set *set, *ns;
+ int err;
list_for_each_entry(chain, &ctx->table->chains, list) {
if (!nft_is_active_next(ctx->net, chain))
@@ -774,7 +748,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, set))
continue;
- if (set->flags & NFT_SET_ANONYMOUS &&
+ if (nft_set_is_anonymous(set) &&
!list_empty(&set->bindings))
continue;
@@ -783,6 +757,12 @@ static int nft_flush_table(struct nft_ctx *ctx)
goto out;
}
+ list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
+ err = nft_delflowtable(ctx, flowtable);
+ if (err < 0)
+ goto out;
+ }
+
list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
err = nft_delobj(ctx, obj);
if (err < 0)
@@ -807,30 +787,28 @@ out:
static int nft_flush(struct nft_ctx *ctx, int family)
{
- struct nft_af_info *afi;
struct nft_table *table, *nt;
const struct nlattr * const *nla = ctx->nla;
int err = 0;
- list_for_each_entry(afi, &ctx->net->nft.af_info, list) {
- if (family != AF_UNSPEC && afi->family != family)
+ list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
+ if (family != AF_UNSPEC && table->family != family)
continue;
- ctx->afi = afi;
- list_for_each_entry_safe(table, nt, &afi->tables, list) {
- if (!nft_is_active_next(ctx->net, table))
- continue;
+ ctx->family = table->family;
- if (nla[NFTA_TABLE_NAME] &&
- nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
- continue;
+ if (!nft_is_active_next(ctx->net, table))
+ continue;
- ctx->table = table;
+ if (nla[NFTA_TABLE_NAME] &&
+ nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
+ continue;
- err = nft_flush_table(ctx);
- if (err < 0)
- goto out;
- }
+ ctx->table = table;
+
+ err = nft_flush_table(ctx);
+ if (err < 0)
+ goto out;
}
out:
return err;
@@ -843,20 +821,23 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
struct nft_table *table;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla);
- if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
+ nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
+ if (family == AF_UNSPEC ||
+ (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
return nft_flush(&ctx, family);
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
+ if (nla[NFTA_TABLE_HANDLE])
+ table = nf_tables_table_lookup_byhandle(net,
+ nla[NFTA_TABLE_HANDLE],
+ genmask);
+ else
+ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
+ family, genmask);
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -864,7 +845,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
table->use > 0)
return -EBUSY;
- ctx.afi = afi;
+ ctx.family = family;
ctx.table = table;
return nft_flush_table(&ctx);
@@ -876,7 +857,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
kfree(ctx->table->name);
kfree(ctx->table);
- module_put(ctx->afi->owner);
}
int nft_register_chain_type(const struct nf_chain_type *ctype)
@@ -1026,7 +1006,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
struct nlattr *nest;
nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
@@ -1077,7 +1057,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
goto err;
err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->afi->family, ctx->table,
+ event, 0, ctx->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -1095,7 +1075,6 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
unsigned int idx = 0, s_idx = cb->args[0];
@@ -1105,31 +1084,30 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- list_for_each_entry_rcu(chain, &table->chains, list) {
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (!nft_is_active(net, chain))
- continue;
- if (nf_tables_fill_chain_info(skb, net,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWCHAIN,
- NLM_F_MULTI,
- afi->family, table, chain) < 0)
- goto done;
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, chain))
+ continue;
+ if (nf_tables_fill_chain_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWCHAIN,
+ NLM_F_MULTI,
+ table->family, table,
+ chain) < 0)
+ goto done;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
}
done:
@@ -1145,7 +1123,6 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
struct sk_buff *skb2;
@@ -1159,11 +1136,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
return netlink_dump_start(nlsk, skb, nlh, &c);
}
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1227,13 +1201,13 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
static void nft_chain_stats_replace(struct nft_base_chain *chain,
struct nft_stats __percpu *newstats)
{
+ struct nft_stats __percpu *oldstats;
+
if (newstats == NULL)
return;
if (chain->stats) {
- struct nft_stats __percpu *oldstats =
- nft_dereference(chain->stats);
-
+ oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES);
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
free_percpu(oldstats);
@@ -1252,8 +1226,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
free_percpu(basechain->stats);
if (basechain->stats)
static_branch_dec(&nft_counters_enabled);
- if (basechain->ops[0].dev != NULL)
- dev_put(basechain->ops[0].dev);
+ if (basechain->ops.dev != NULL)
+ dev_put(basechain->ops.dev);
kfree(chain->name);
kfree(basechain);
} else {
@@ -1264,15 +1238,15 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
struct nft_chain_hook {
u32 num;
- u32 priority;
+ s32 priority;
const struct nf_chain_type *type;
struct net_device *dev;
};
static int nft_chain_parse_hook(struct net *net,
const struct nlattr * const nla[],
- struct nft_af_info *afi,
- struct nft_chain_hook *hook, bool create)
+ struct nft_chain_hook *hook, u8 family,
+ bool create)
{
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nf_chain_type *type;
@@ -1289,27 +1263,29 @@ static int nft_chain_parse_hook(struct net *net,
return -EINVAL;
hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
- if (hook->num >= afi->nhooks)
- return -EINVAL;
-
hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
- type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+ type = chain_type[family][NFT_CHAIN_T_DEFAULT];
if (nla[NFTA_CHAIN_TYPE]) {
- type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
- create);
+ type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+ family, create);
if (IS_ERR(type))
return PTR_ERR(type);
}
if (!(type->hook_mask & (1 << hook->num)))
return -EOPNOTSUPP;
+
+ if (type->type == NFT_CHAIN_T_NAT &&
+ hook->priority <= NF_IP_PRI_CONNTRACK)
+ return -EOPNOTSUPP;
+
if (!try_module_get(type->owner))
return -ENOENT;
hook->type = type;
hook->dev = NULL;
- if (afi->flags & NFT_AF_NEEDS_DEV) {
+ if (family == NFPROTO_NETDEV) {
char ifname[IFNAMSIZ];
if (!ha[NFTA_HOOK_DEV]) {
@@ -1344,12 +1320,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
- struct nft_af_info *afi = ctx->afi;
struct nft_base_chain *basechain;
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
struct nft_chain *chain;
- unsigned int i;
int err;
if (table->use == UINT_MAX)
@@ -1358,9 +1332,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
- nf_hookfn *hookfn;
- err = nft_chain_parse_hook(net, nla, afi, &hook, create);
+ err = nft_chain_parse_hook(net, nla, &hook, family, create);
if (err < 0)
return err;
@@ -1384,23 +1357,19 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
static_branch_inc(&nft_counters_enabled);
}
- hookfn = hook.type->hooks[hook.num];
basechain->type = hook.type;
chain = &basechain->chain;
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- ops->pf = family;
- ops->hooknum = hook.num;
- ops->priority = hook.priority;
- ops->priv = chain;
- ops->hook = afi->hooks[ops->hooknum];
- ops->dev = hook.dev;
- if (hookfn)
- ops->hook = hookfn;
- if (afi->hook_ops_init)
- afi->hook_ops_init(ops, i);
- }
+ ops = &basechain->ops;
+ ops->pf = family;
+ ops->hooknum = hook.num;
+ ops->priority = hook.priority;
+ ops->priv = chain;
+ ops->hook = hook.type->hooks[ops->hooknum];
+ ops->dev = hook.dev;
+
+ if (basechain->type->type == NFT_CHAIN_T_NAT)
+ ops->nat_hook = true;
chain->flags |= NFT_BASE_CHAIN;
basechain->policy = policy;
@@ -1418,7 +1387,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err1;
}
- err = nf_tables_register_hooks(net, table, chain, afi->nops);
+ err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err1;
@@ -1432,7 +1401,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return 0;
err2:
- nf_tables_unregister_hooks(net, table, chain, afi->nops);
+ nf_tables_unregister_hook(net, table, chain);
err1:
nf_tables_chain_destroy(chain);
@@ -1445,20 +1414,19 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_chain *chain = ctx->chain;
- struct nft_af_info *afi = ctx->afi;
struct nft_base_chain *basechain;
struct nft_stats *stats = NULL;
struct nft_chain_hook hook;
const struct nlattr *name;
struct nf_hook_ops *ops;
struct nft_trans *trans;
- int err, i;
+ int err;
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain))
return -EBUSY;
- err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
+ err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
create);
if (err < 0)
return err;
@@ -1469,14 +1437,12 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EBUSY;
}
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
+ ops = &basechain->ops;
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
}
nft_chain_release_hook(&hook);
}
@@ -1539,7 +1505,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
const struct nlattr * uninitialized_var(name);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
u8 policy = NF_ACCEPT;
@@ -1549,11 +1514,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1593,7 +1555,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
if (chain != NULL) {
if (nlh->nlmsg_flags & NLM_F_EXCL)
@@ -1614,24 +1576,26 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ u64 handle;
u32 use;
int err;
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+ } else {
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ }
if (IS_ERR(chain))
return PTR_ERR(chain);
@@ -1639,7 +1603,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
chain->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
use = chain->use;
list_for_each_entry(rule, &chain->rules, list) {
@@ -1804,7 +1768,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (err < 0)
return err;
- type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]);
+ type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
if (IS_ERR(type))
return PTR_ERR(type);
@@ -2027,7 +1991,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
goto err;
err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->afi->family, ctx->table,
+ event, 0, ctx->family, ctx->table,
ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
@@ -2051,7 +2015,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_rule_dump_ctx *ctx = cb->data;
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -2062,39 +2025,37 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx && ctx->table &&
- strcmp(ctx->table, table->name) != 0)
+ if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
+ continue;
+
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (ctx && ctx->chain &&
+ strcmp(ctx->chain, chain->name) != 0)
continue;
- list_for_each_entry_rcu(chain, &table->chains, list) {
- if (ctx && ctx->chain[0] &&
- strcmp(ctx->chain, chain->name) != 0)
- continue;
-
- list_for_each_entry_rcu(rule, &chain->rules, list) {
- if (!nft_is_active(net, rule))
- goto cont;
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWRULE,
- NLM_F_MULTI | NLM_F_APPEND,
- afi->family, table, chain, rule) < 0)
- goto done;
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ list_for_each_entry_rcu(rule, &chain->rules, list) {
+ if (!nft_is_active(net, rule))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWRULE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family,
+ table, chain, rule) < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
}
}
@@ -2124,7 +2085,6 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -2168,11 +2128,8 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
return netlink_dump_start(nlsk, skb, nlh, &c);
}
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -2229,7 +2186,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
+ int family = nfmsg->nfgen_family;
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
@@ -2245,11 +2202,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -2288,7 +2242,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return PTR_ERR(old_rule);
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
n = 0;
size = 0;
@@ -2412,18 +2366,14 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
int family = nfmsg->nfgen_family, err = 0;
struct nft_ctx ctx;
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -2434,7 +2384,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
return PTR_ERR(chain);
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
@@ -2601,6 +2551,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
+ [NFTA_SET_HANDLE] = { .type = NLA_U64 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2614,26 +2565,17 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct nft_af_info *afi = NULL;
+ int family = nfmsg->nfgen_family;
struct nft_table *table = NULL;
- if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
- }
-
if (nla[NFTA_SET_TABLE] != NULL) {
- if (afi == NULL)
- return -EAFNOSUPPORT;
-
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
- genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE],
+ family, genmask);
if (IS_ERR(table))
return PTR_ERR(table);
}
- nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
return 0;
}
@@ -2653,6 +2595,22 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
+static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
+{
+ struct nft_set *set;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ list_for_each_entry(set, &table->sets, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
+ nft_active_genmask(set, genmask))
+ return set;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
const struct nlattr *nla,
u8 genmask)
@@ -2760,7 +2718,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->nfgen_family = ctx->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
@@ -2768,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
+ NFTA_SET_PAD))
+ goto nla_put_failure;
if (set->flags != 0)
if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
goto nla_put_failure;
@@ -2852,10 +2813,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
- struct nft_af_info *afi;
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
- int cur_family = cb->args[3];
struct nft_ctx *ctx = cb->data, ctx_set;
if (cb->args[1])
@@ -2864,51 +2823,44 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (ctx->afi && ctx->afi != afi)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (ctx->family != NFPROTO_UNSPEC &&
+ ctx->family != table->family)
+ continue;
+
+ if (ctx->table && ctx->table != table)
continue;
- if (cur_family) {
- if (afi->family != cur_family)
+ if (cur_table) {
+ if (cur_table != table)
continue;
- cur_family = 0;
+ cur_table = NULL;
}
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx->table && ctx->table != table)
- continue;
+ idx = 0;
+ list_for_each_entry_rcu(set, &table->sets, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (!nft_is_active(net, set))
+ goto cont;
- if (cur_table) {
- if (cur_table != table)
- continue;
+ ctx_set = *ctx;
+ ctx_set.table = table;
+ ctx_set.family = table->family;
- cur_table = NULL;
+ if (nf_tables_fill_set(skb, &ctx_set, set,
+ NFT_MSG_NEWSET,
+ NLM_F_MULTI) < 0) {
+ cb->args[0] = idx;
+ cb->args[2] = (unsigned long) table;
+ goto done;
}
- idx = 0;
- list_for_each_entry_rcu(set, &table->sets, list) {
- if (idx < s_idx)
- goto cont;
- if (!nft_is_active(net, set))
- goto cont;
-
- ctx_set = *ctx;
- ctx_set.table = table;
- ctx_set.afi = afi;
- if (nf_tables_fill_set(skb, &ctx_set, set,
- NFT_MSG_NEWSET,
- NLM_F_MULTI) < 0) {
- cb->args[0] = idx;
- cb->args[2] = (unsigned long) table;
- cb->args[3] = afi->family;
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
- if (s_idx)
- s_idx = 0;
+ idx++;
}
+ if (s_idx)
+ s_idx = 0;
}
cb->args[1] = 1;
done:
@@ -3006,8 +2958,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
const struct nft_set_ops *ops;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3114,15 +3066,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
@@ -3188,6 +3137,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
set->udata = udata;
set->timeout = timeout;
set->gc_int = gc_int;
+ set->handle = nf_tables_alloc_handle(table);
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -3245,7 +3195,10 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+ if (nla[NFTA_SET_HANDLE])
+ set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
+ else
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
@@ -3277,7 +3230,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
if (binding->flags & NFT_SET_MAP) {
@@ -3312,7 +3265,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
{
list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
nft_is_active(ctx->net, set))
nf_tables_set_destroy(ctx, set);
}
@@ -3380,19 +3333,15 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct nft_af_info *afi;
+ int family = nfmsg->nfgen_family;
struct nft_table *table;
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
- genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE],
+ family, genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
return 0;
}
@@ -3497,7 +3446,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nft_set_dump_ctx *dump_ctx = cb->data;
struct net *net = sock_net(skb->sk);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_set *set;
struct nft_set_dump_args args;
@@ -3509,21 +3457,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
int event;
rcu_read_lock();
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (afi != dump_ctx->ctx.afi)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
+ dump_ctx->ctx.family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (table != dump_ctx->ctx.table)
- continue;
+ if (table != dump_ctx->ctx.table)
+ continue;
- list_for_each_entry_rcu(set, &table->sets, list) {
- if (set == dump_ctx->set) {
- set_found = true;
- break;
- }
+ list_for_each_entry_rcu(set, &table->sets, list) {
+ if (set == dump_ctx->set) {
+ set_found = true;
+ break;
}
- break;
}
break;
}
@@ -3543,7 +3489,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = afi->family;
+ nfmsg->nfgen_family = table->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
@@ -3606,7 +3552,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->nfgen_family = ctx->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
@@ -3963,7 +3909,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
.net = ctx->net,
- .afi = ctx->afi,
+ .family = ctx->family,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
};
@@ -4382,6 +4328,21 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
}
EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
+struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla,
+ u32 objtype, u8 genmask)
+{
+ struct nft_object *obj;
+
+ list_for_each_entry(obj, &table->objects, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
+ objtype == obj->ops->type->type &&
+ nft_active_genmask(obj, genmask))
+ return obj;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_TABLE] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
@@ -4389,6 +4350,7 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
+ [NFTA_OBJ_HANDLE] = { .type = NLA_U64},
};
static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
@@ -4494,7 +4456,6 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
const struct nft_object_type *type;
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_object *obj;
struct nft_ctx ctx;
@@ -4506,11 +4467,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
!nla[NFTA_OBJ_DATA])
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -4528,7 +4486,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
return 0;
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
type = nft_obj_type_get(objtype);
if (IS_ERR(type))
@@ -4540,6 +4498,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
goto err1;
}
obj->table = table;
+ obj->handle = nf_tables_alloc_handle(table);
+
obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
if (!obj->name) {
err = -ENOMEM;
@@ -4586,7 +4546,9 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
- nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
+ nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
+ nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
+ NFTA_OBJ_PAD))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4605,7 +4567,6 @@ struct nft_obj_filter {
static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_af_info *afi;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
struct nft_obj_filter *filter = cb->data;
@@ -4620,38 +4581,37 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- list_for_each_entry_rcu(obj, &table->objects, list) {
- if (!nft_is_active(net, obj))
- goto cont;
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table[0] &&
- strcmp(filter->table, table->name))
- goto cont;
- if (filter &&
- filter->type != NFT_OBJECT_UNSPEC &&
- obj->ops->type->type != filter->type)
- goto cont;
+ list_for_each_entry_rcu(obj, &table->objects, list) {
+ if (!nft_is_active(net, obj))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (filter && filter->table[0] &&
+ strcmp(filter->table, table->name))
+ goto cont;
+ if (filter &&
+ filter->type != NFT_OBJECT_UNSPEC &&
+ obj->ops->type->type != filter->type)
+ goto cont;
- if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWOBJ,
- NLM_F_MULTI | NLM_F_APPEND,
- afi->family, table, obj, reset) < 0)
- goto done;
+ if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWOBJ,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family, table,
+ obj, reset) < 0)
+ goto done;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
}
done:
@@ -4665,8 +4625,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
struct nft_obj_filter *filter = cb->data;
- kfree(filter->table);
- kfree(filter);
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
return 0;
}
@@ -4701,7 +4663,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
int family = nfmsg->nfgen_family;
- const struct nft_af_info *afi;
const struct nft_table *table;
struct nft_object *obj;
struct sk_buff *skb2;
@@ -4732,11 +4693,8 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
!nla[NFTA_OBJ_TYPE])
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -4782,32 +4740,33 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_object *obj;
struct nft_ctx ctx;
u32 objtype;
if (!nla[NFTA_OBJ_TYPE] ||
- !nla[NFTA_OBJ_NAME])
+ (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ if (nla[NFTA_OBJ_HANDLE])
+ obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
+ objtype, genmask);
+ else
+ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
+ objtype, genmask);
if (IS_ERR(obj))
return PTR_ERR(obj);
if (obj->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nft_delobj(&ctx, obj);
}
@@ -4845,7 +4804,613 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->afi->family, ctx->report, GFP_KERNEL);
+ ctx->family, ctx->report, GFP_KERNEL);
+}
+
+/*
+ * Flow tables
+ */
+void nft_register_flowtable_type(struct nf_flowtable_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail_rcu(&type->list, &nf_tables_flowtables);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_register_flowtable_type);
+
+void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del_rcu(&type->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);
+
+static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
+ [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING,
+ .len = NFT_NAME_MAXLEN - 1 },
+ [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING,
+ .len = NFT_NAME_MAXLEN - 1 },
+ [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
+ [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
+};
+
+struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
+ const struct nlattr *nla,
+ u8 genmask)
+{
+ struct nft_flowtable *flowtable;
+
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ if (!nla_strcmp(nla, flowtable->name) &&
+ nft_active_genmask(flowtable, genmask))
+ return flowtable;
+ }
+ return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
+
+struct nft_flowtable *
+nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
+{
+ struct nft_flowtable *flowtable;
+
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
+ nft_active_genmask(flowtable, genmask))
+ return flowtable;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+#define NFT_FLOWTABLE_DEVICE_MAX 8
+
+static int nf_tables_parse_devices(const struct nft_ctx *ctx,
+ const struct nlattr *attr,
+ struct net_device *dev_array[], int *len)
+{
+ const struct nlattr *tmp;
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ int rem, n = 0, err;
+
+ nla_for_each_nested(tmp, attr, rem) {
+ if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+ err = -EINVAL;
+ goto err1;
+ }
+
+ nla_strlcpy(ifname, tmp, IFNAMSIZ);
+ dev = dev_get_by_name(ctx->net, ifname);
+ if (!dev) {
+ err = -ENOENT;
+ goto err1;
+ }
+
+ dev_array[n++] = dev;
+ if (n == NFT_FLOWTABLE_DEVICE_MAX) {
+ err = -EFBIG;
+ goto err1;
+ }
+ }
+ if (!len)
+ return -EINVAL;
+
+ err = 0;
+err1:
+ *len = n;
+ return err;
+}
+
+static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
+ [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
+ [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
+ [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED },
+};
+
+static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
+ const struct nlattr *attr,
+ struct nft_flowtable *flowtable)
+{
+ struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
+ struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
+ struct nf_hook_ops *ops;
+ int hooknum, priority;
+ int err, n = 0, i;
+
+ err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
+ nft_flowtable_hook_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
+ !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
+ !tb[NFTA_FLOWTABLE_HOOK_DEVS])
+ return -EINVAL;
+
+ hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
+ if (hooknum != NF_NETDEV_INGRESS)
+ return -EINVAL;
+
+ priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+
+ err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
+ dev_array, &n);
+ if (err < 0)
+ goto err1;
+
+ ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
+ if (!ops) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ flowtable->hooknum = hooknum;
+ flowtable->priority = priority;
+ flowtable->ops = ops;
+ flowtable->ops_len = n;
+
+ for (i = 0; i < n; i++) {
+ flowtable->ops[i].pf = NFPROTO_NETDEV;
+ flowtable->ops[i].hooknum = hooknum;
+ flowtable->ops[i].priority = priority;
+ flowtable->ops[i].priv = &flowtable->data.rhashtable;
+ flowtable->ops[i].hook = flowtable->data.type->hook;
+ flowtable->ops[i].dev = dev_array[i];
+ }
+
+ err = 0;
+err1:
+ for (i = 0; i < n; i++)
+ dev_put(dev_array[i]);
+
+ return err;
+}
+
+static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
+{
+ const struct nf_flowtable_type *type;
+
+ list_for_each_entry(type, &nf_tables_flowtables, list) {
+ if (family == type->family)
+ return type;
+ }
+ return NULL;
+}
+
+static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
+{
+ const struct nf_flowtable_type *type;
+
+ type = __nft_flowtable_type_get(family);
+ if (type != NULL && try_module_get(type->owner))
+ return type;
+
+#ifdef CONFIG_MODULES
+ if (type == NULL) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nf-flowtable-%u", family);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (__nft_flowtable_type_get(family))
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ return ERR_PTR(-ENOENT);
+}
+
+void nft_flow_table_iterate(struct net *net,
+ void (*iter)(struct nf_flowtable *flowtable, void *data),
+ void *data)
+{
+ struct nft_flowtable *flowtable;
+ const struct nft_table *table;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ iter(&flowtable->data, data);
+ }
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
+
+static void nft_unregister_flowtable_net_hooks(struct net *net,
+ struct nft_flowtable *flowtable)
+{
+ int i;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ if (!flowtable->ops[i].dev)
+ continue;
+
+ nf_unregister_net_hook(net, &flowtable->ops[i]);
+ }
+}
+
+static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nf_flowtable_type *type;
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_table *table;
+ struct nft_ctx ctx;
+ int err, i, k;
+
+ if (!nla[NFTA_FLOWTABLE_TABLE] ||
+ !nla[NFTA_FLOWTABLE_NAME] ||
+ !nla[NFTA_FLOWTABLE_HOOK])
+ return -EINVAL;
+
+ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+ family, genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable)) {
+ err = PTR_ERR(flowtable);
+ if (err != -ENOENT)
+ return err;
+ } else {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ return 0;
+ }
+
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+ flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+ if (!flowtable)
+ return -ENOMEM;
+
+ flowtable->table = table;
+ flowtable->handle = nf_tables_alloc_handle(table);
+
+ flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
+ if (!flowtable->name) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ type = nft_flowtable_type_get(family);
+ if (IS_ERR(type)) {
+ err = PTR_ERR(type);
+ goto err2;
+ }
+
+ flowtable->data.type = type;
+ err = rhashtable_init(&flowtable->data.rhashtable, type->params);
+ if (err < 0)
+ goto err3;
+
+ err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+ flowtable);
+ if (err < 0)
+ goto err3;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ err = nf_register_net_hook(net, &flowtable->ops[i]);
+ if (err < 0)
+ goto err4;
+ }
+
+ err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+ if (err < 0)
+ goto err5;
+
+ INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
+ queue_delayed_work(system_power_efficient_wq,
+ &flowtable->data.gc_work, HZ);
+
+ list_add_tail_rcu(&flowtable->list, &table->flowtables);
+ table->use++;
+
+ return 0;
+err5:
+ i = flowtable->ops_len;
+err4:
+ for (k = i - 1; k >= 0; k--)
+ nf_unregister_net_hook(net, &flowtable->ops[i]);
+
+ kfree(flowtable->ops);
+err3:
+ module_put(type->owner);
+err2:
+ kfree(flowtable->name);
+err1:
+ kfree(flowtable);
+ return err;
+}
+
+static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_table *table;
+ struct nft_ctx ctx;
+
+ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+ family, genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ if (nla[NFTA_FLOWTABLE_HANDLE])
+ flowtable = nf_tables_flowtable_lookup_byhandle(table,
+ nla[NFTA_FLOWTABLE_HANDLE],
+ genmask);
+ else
+ flowtable = nf_tables_flowtable_lookup(table,
+ nla[NFTA_FLOWTABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable))
+ return PTR_ERR(flowtable);
+ if (flowtable->use > 0)
+ return -EBUSY;
+
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+ return nft_delflowtable(&ctx, flowtable);
+}
+
+static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event,
+ u32 flags, int family,
+ struct nft_flowtable *flowtable)
+{
+ struct nlattr *nest, *nest_devs;
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ int i;
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
+
+ if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
+ nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
+ nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
+ NFTA_FLOWTABLE_PAD))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
+ if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
+ goto nla_put_failure;
+
+ nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS);
+ if (!nest_devs)
+ goto nla_put_failure;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ if (flowtable->ops[i].dev &&
+ nla_put_string(skb, NFTA_DEVICE_NAME,
+ flowtable->ops[i].dev->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest_devs);
+ nla_nest_end(skb, nest);
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+struct nft_flowtable_filter {
+ char *table;
+};
+
+static int nf_tables_dump_flowtable(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ struct nft_flowtable_filter *filter = cb->data;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ const struct nft_table *table;
+
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+ continue;
+
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ if (!nft_is_active(net, flowtable))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (filter && filter->table[0] &&
+ strcmp(filter->table, table->name))
+ goto cont;
+
+ if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWFLOWTABLE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family, flowtable) < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
+{
+ struct nft_flowtable_filter *filter = cb->data;
+
+ if (!filter)
+ return 0;
+
+ kfree(filter->table);
+ kfree(filter);
+
+ return 0;
+}
+
+static struct nft_flowtable_filter *
+nft_flowtable_filter_alloc(const struct nlattr * const nla[])
+{
+ struct nft_flowtable_filter *filter;
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return ERR_PTR(-ENOMEM);
+
+ if (nla[NFTA_FLOWTABLE_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
+ GFP_KERNEL);
+ if (!filter->table) {
+ kfree(filter);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+ return filter;
+}
+
+static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ const struct nft_table *table;
+ struct sk_buff *skb2;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_flowtable,
+ .done = nf_tables_dump_flowtable_done,
+ };
+
+ if (nla[NFTA_FLOWTABLE_TABLE]) {
+ struct nft_flowtable_filter *filter;
+
+ filter = nft_flowtable_filter_alloc(nla);
+ if (IS_ERR(filter))
+ return -ENOMEM;
+
+ c.data = filter;
+ }
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ if (!nla[NFTA_FLOWTABLE_NAME])
+ return -EINVAL;
+
+ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+ family, genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable))
+ return PTR_ERR(flowtable);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFT_MSG_NEWFLOWTABLE, 0, family,
+ flowtable);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
+ struct nft_flowtable *flowtable,
+ int event)
+{
+ struct sk_buff *skb;
+ int err;
+
+ if (ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ return;
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
+ ctx->seq, event, 0,
+ ctx->family, flowtable);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
+ return;
+err:
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+static void nft_flowtable_destroy(void *ptr, void *arg)
+{
+ kfree(ptr);
+}
+
+static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
+{
+ cancel_delayed_work_sync(&flowtable->data.gc_work);
+ kfree(flowtable->name);
+ rhashtable_free_and_destroy(&flowtable->data.rhashtable,
+ nft_flowtable_destroy, NULL);
+ module_put(flowtable->data.type->owner);
}
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -4878,6 +5443,46 @@ nla_put_failure:
return -EMSGSIZE;
}
+static void nft_flowtable_event(unsigned long event, struct net_device *dev,
+ struct nft_flowtable *flowtable)
+{
+ int i;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ if (flowtable->ops[i].dev != dev)
+ continue;
+
+ nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
+ flowtable->ops[i].dev = NULL;
+ break;
+ }
+}
+
+static int nf_tables_flowtable_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_flowtable *flowtable;
+ struct nft_table *table;
+
+ if (event != NETDEV_UNREGISTER)
+ return 0;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_for_each_entry(table, &dev_net(dev)->nft.tables, list) {
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ nft_flowtable_event(event, dev, flowtable);
+ }
+ }
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_flowtable_notifier = {
+ .notifier_call = nf_tables_flowtable_event,
+};
+
static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
int event)
{
@@ -5030,6 +5635,21 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
+ [NFT_MSG_NEWFLOWTABLE] = {
+ .call_batch = nf_tables_newflowtable,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
+ [NFT_MSG_GETFLOWTABLE] = {
+ .call = nf_tables_getflowtable,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
+ [NFT_MSG_DELFLOWTABLE] = {
+ .call_batch = nf_tables_delflowtable,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
@@ -5075,6 +5695,9 @@ static void nf_tables_commit_release(struct nft_trans *trans)
case NFT_MSG_DELOBJ:
nft_obj_destroy(nft_trans_obj(trans));
break;
+ case NFT_MSG_DELFLOWTABLE:
+ nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+ break;
}
kfree(trans);
}
@@ -5101,7 +5724,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
if (nft_trans_table_update(trans)) {
if (!nft_trans_table_enable(trans)) {
nf_tables_table_disable(net,
- trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
@@ -5127,10 +5749,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELCHAIN:
list_del_rcu(&trans->ctx.chain->list);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
- nf_tables_unregister_hooks(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain,
- trans->ctx.afi->nops);
+ nf_tables_unregister_hook(trans->ctx.net,
+ trans->ctx.table,
+ trans->ctx.chain);
break;
case NFT_MSG_NEWRULE:
nft_clear(trans->ctx.net, nft_trans_rule(trans));
@@ -5150,7 +5771,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* This avoids hitting -EBUSY when deleting the table
* from the transaction.
*/
- if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS &&
+ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
trans->ctx.table->use--;
@@ -5193,6 +5814,21 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
NFT_MSG_DELOBJ);
break;
+ case NFT_MSG_NEWFLOWTABLE:
+ nft_clear(net, nft_trans_flowtable(trans));
+ nf_tables_flowtable_notify(&trans->ctx,
+ nft_trans_flowtable(trans),
+ NFT_MSG_NEWFLOWTABLE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELFLOWTABLE:
+ list_del_rcu(&nft_trans_flowtable(trans)->list);
+ nf_tables_flowtable_notify(&trans->ctx,
+ nft_trans_flowtable(trans),
+ NFT_MSG_DELFLOWTABLE);
+ nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans));
+ break;
}
}
@@ -5230,6 +5866,9 @@ static void nf_tables_abort_release(struct nft_trans *trans)
case NFT_MSG_NEWOBJ:
nft_obj_destroy(nft_trans_obj(trans));
break;
+ case NFT_MSG_NEWFLOWTABLE:
+ nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+ break;
}
kfree(trans);
}
@@ -5246,7 +5885,6 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
if (nft_trans_table_update(trans)) {
if (nft_trans_table_enable(trans)) {
nf_tables_table_disable(net,
- trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
@@ -5267,10 +5905,9 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
- nf_tables_unregister_hooks(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain,
- trans->ctx.afi->nops);
+ nf_tables_unregister_hook(trans->ctx.net,
+ trans->ctx.table,
+ trans->ctx.chain);
}
break;
case NFT_MSG_DELCHAIN:
@@ -5320,6 +5957,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
+ case NFT_MSG_NEWFLOWTABLE:
+ trans->ctx.table->use--;
+ list_del_rcu(&nft_trans_flowtable(trans)->list);
+ nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans));
+ break;
+ case NFT_MSG_DELFLOWTABLE:
+ trans->ctx.table->use++;
+ nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+ nft_trans_destroy(trans);
+ break;
}
}
@@ -5371,7 +6019,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
if (nft_is_base_chain(chain)) {
basechain = nft_base_chain(chain);
- if ((1 << basechain->ops[0].hooknum) & hook_flags)
+ if ((1 << basechain->ops.hooknum) & hook_flags)
return 0;
return -EOPNOTSUPP;
@@ -5839,22 +6487,13 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
}
EXPORT_SYMBOL_GPL(nft_data_dump);
-static int __net_init nf_tables_init_net(struct net *net)
-{
- INIT_LIST_HEAD(&net->nft.af_info);
- INIT_LIST_HEAD(&net->nft.commit_list);
- net->nft.base_seq = 1;
- return 0;
-}
-
int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
BUG_ON(!nft_is_base_chain(ctx->chain));
- nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
- ctx->afi->nops);
+ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
ctx->chain->use--;
@@ -5868,9 +6507,9 @@ int __nft_release_basechain(struct nft_ctx *ctx)
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
-/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
+static void __nft_release_tables(struct net *net)
{
+ struct nft_flowtable *flowtable, *nf;
struct nft_table *table, *nt;
struct nft_chain *chain, *nc;
struct nft_object *obj, *ne;
@@ -5878,13 +6517,16 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
struct nft_set *set, *ns;
struct nft_ctx ctx = {
.net = net,
- .afi = afi,
};
- list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+ ctx.family = table->family;
+
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hooks(net, table, chain,
- afi->nops);
+ nf_tables_unregister_hook(net, table, chain);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+ nf_unregister_net_hooks(net, flowtable->ops,
+ flowtable->ops_len);
/* No packets are walking on these chains anymore. */
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
@@ -5895,6 +6537,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
nf_tables_rule_destroy(&ctx, rule);
}
}
+ list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+ list_del(&flowtable->list);
+ table->use--;
+ nf_tables_flowtable_destroy(flowtable);
+ }
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
table->use--;
@@ -5915,8 +6562,24 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
}
}
+static int __net_init nf_tables_init_net(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nft.tables);
+ INIT_LIST_HEAD(&net->nft.commit_list);
+ net->nft.base_seq = 1;
+ return 0;
+}
+
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+ __nft_release_tables(net);
+ WARN_ON_ONCE(!list_empty(&net->nft.tables));
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
+ .exit = nf_tables_exit_net,
};
static int __init nf_tables_module_init(void)
@@ -5938,7 +6601,8 @@ static int __init nf_tables_module_init(void)
if (err < 0)
goto err3;
- pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+ register_netdevice_notifier(&nf_tables_flowtable_notifier);
+
return register_pernet_subsys(&nf_tables_net_ops);
err3:
nf_tables_core_module_exit();
@@ -5952,6 +6616,7 @@ static void __exit nf_tables_module_exit(void)
{
unregister_pernet_subsys(&nf_tables_net_ops);
nfnetlink_subsys_unregister(&nf_tables_subsys);
+ unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
rcu_barrier();
nf_tables_core_module_exit();
kfree(info);
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index f713cc205669..e30c7da09d0d 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_tables.h>
@@ -16,56 +17,27 @@
#include <net/netfilter/nf_tables_ipv6.h>
#include <net/ip.h>
-static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n)
+static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
- struct nft_af_info *afi;
-
- if (n == 1)
- afi = &nft_af_ipv4;
- else
- afi = &nft_af_ipv6;
-
- ops->pf = afi->family;
- if (afi->hooks[ops->hooknum])
- ops->hook = afi->hooks[ops->hooknum];
-}
-
-static struct nft_af_info nft_af_inet __read_mostly = {
- .family = NFPROTO_INET,
- .nhooks = NF_INET_NUMHOOKS,
- .owner = THIS_MODULE,
- .nops = 2,
- .hook_ops_init = nft_inet_hook_ops_init,
-};
-
-static int __net_init nf_tables_inet_init_net(struct net *net)
-{
- net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.inet == NULL)
- return -ENOMEM;
- memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet));
-
- if (nft_register_afinfo(net, net->nft.inet) < 0)
- goto err;
-
- return 0;
-
-err:
- kfree(net->nft.inet);
- return -ENOMEM;
-}
-
-static void __net_exit nf_tables_inet_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.inet);
- kfree(net->nft.inet);
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (state->pf) {
+ case NFPROTO_IPV4:
+ nft_set_pktinfo_ipv4(&pkt, skb);
+ break;
+ case NFPROTO_IPV6:
+ nft_set_pktinfo_ipv6(&pkt, skb);
+ break;
+ default:
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
}
-static struct pernet_operations nf_tables_inet_net_ops = {
- .init = nf_tables_inet_init_net,
- .exit = nf_tables_inet_exit_net,
-};
-
static const struct nf_chain_type filter_inet = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -76,26 +48,22 @@ static const struct nf_chain_type filter_inet = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
+ [NF_INET_FORWARD] = nft_do_chain_inet,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
+ },
};
static int __init nf_tables_inet_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_inet);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_inet_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_inet);
-
- return ret;
+ return nft_register_chain_type(&filter_inet);
}
static void __exit nf_tables_inet_exit(void)
{
- unregister_pernet_subsys(&nf_tables_inet_net_ops);
nft_unregister_chain_type(&filter_inet);
}
@@ -104,4 +72,4 @@ module_exit(nf_tables_inet_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(1);
+MODULE_ALIAS_NFT_CHAIN(1, "filter");
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 403432988313..4041fafca934 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -21,66 +21,32 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
{
struct nft_pktinfo pkt;
+ nft_set_pktinfo(&pkt, skb, state);
+
switch (skb->protocol) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
break;
}
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_netdev __read_mostly = {
- .family = NFPROTO_NETDEV,
- .nhooks = NF_NETDEV_NUMHOOKS,
- .owner = THIS_MODULE,
- .flags = NFT_AF_NEEDS_DEV,
- .nops = 1,
- .hooks = {
- [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
- },
-};
-
-static int nf_tables_netdev_init_net(struct net *net)
-{
- net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.netdev == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
-
- if (nft_register_afinfo(net, net->nft.netdev) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.netdev);
- return -ENOMEM;
-}
-
-static void nf_tables_netdev_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.netdev);
- kfree(net->nft.netdev);
-}
-
-static struct pernet_operations nf_tables_netdev_net_ops = {
- .init = nf_tables_netdev_init_net,
- .exit = nf_tables_netdev_exit_net,
-};
-
static const struct nf_chain_type nft_filter_chain_netdev = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
.family = NFPROTO_NETDEV,
.owner = THIS_MODULE,
.hook_mask = (1 << NF_NETDEV_INGRESS),
+ .hooks = {
+ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ },
};
static void nft_netdev_event(unsigned long event, struct net_device *dev,
@@ -96,7 +62,7 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
__nft_release_basechain(ctx);
break;
case NETDEV_CHANGENAME:
- if (dev->ifindex != basechain->ops[0].dev->ifindex)
+ if (dev->ifindex != basechain->ops.dev->ifindex)
return;
strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
@@ -108,7 +74,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain, *nr;
struct nft_ctx ctx = {
@@ -120,20 +85,18 @@ static int nf_tables_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
- ctx.afi = afi;
- if (afi->family != NFPROTO_NETDEV)
+ list_for_each_entry(table, &ctx.net->nft.tables, list) {
+ if (table->family != NFPROTO_NETDEV)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- ctx.table = table;
- list_for_each_entry_safe(chain, nr, &table->chains, list) {
- if (!nft_is_base_chain(chain))
- continue;
+ ctx.family = table->family;
+ ctx.table = table;
+ list_for_each_entry_safe(chain, nr, &table->chains, list) {
+ if (!nft_is_base_chain(chain))
+ continue;
- ctx.chain = chain;
- nft_netdev_event(event, dev, &ctx);
- }
+ ctx.chain = chain;
+ nft_netdev_event(event, dev, &ctx);
}
}
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
@@ -153,27 +116,21 @@ static int __init nf_tables_netdev_init(void)
if (ret)
return ret;
- ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
- if (ret)
- goto err1;
-
ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
if (ret)
- goto err2;
+ goto err_register_netdevice_notifier;
return 0;
-err2:
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
-err1:
+err_register_netdevice_notifier:
nft_unregister_chain_type(&nft_filter_chain_netdev);
+
return ret;
}
static void __exit nf_tables_netdev_exit(void)
{
unregister_netdevice_notifier(&nf_tables_netdev_notifier);
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
nft_unregister_chain_type(&nft_filter_chain_netdev);
}
@@ -182,4 +139,4 @@ module_exit(nf_tables_netdev_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */
+MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 733d3e4a30d8..03ead8a9e90c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -37,8 +37,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
rcu_dereference_protected(table[(id)].subsys, \
lockdep_nfnl_is_held((id)))
-static char __initdata nfversion[] = "0.30";
-
static struct {
struct mutex mutex;
const struct nfnetlink_subsystem __rcu *subsys;
@@ -580,13 +578,11 @@ static int __init nfnetlink_init(void)
for (i=0; i<NFNL_SUBSYS_COUNT; i++)
mutex_init(&table[i].mutex);
- pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
return register_pernet_subsys(&nfnetlink_net_ops);
}
static void __exit nfnetlink_exit(void)
{
- pr_info("Removing netfilter NETLINK layer.\n");
unregister_pernet_subsys(&nfnetlink_net_ops);
}
module_init(nfnetlink_init);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c45e6d4358ab..88d427f9f9e6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -527,7 +527,6 @@ static int __init nfnl_acct_init(void)
goto err_out;
}
- pr_info("nfnl_acct: registering with nfnetlink.\n");
ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
if (ret < 0) {
pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
@@ -543,7 +542,6 @@ err_out:
static void __exit nfnl_acct_exit(void)
{
- pr_info("nfnl_acct: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&nfnl_acct_subsys);
unregister_pernet_subsys(&nfnl_acct_ops);
}
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 41628b393673..d33ce6d5ebce 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/errno.h>
+#include <linux/capability.h>
#include <net/netlink.h>
#include <net/sock.h>
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth;
int ret = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
return -EINVAL;
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth;
bool tuple_set = false;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth, *n;
int j = 0, ret;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (tb[NFCTH_NAME])
helper_name = nla_data(tb[NFCTH_NAME]);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 32b1c0b44e79..95b04702a655 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -615,8 +615,6 @@ err_out:
static void __exit cttimeout_exit(void)
{
- pr_info("cttimeout: unregistering from nfnetlink.\n");
-
nfnetlink_subsys_unregister(&cttimeout_subsys);
unregister_pernet_subsys(&cttimeout_ops);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5afab86381c..7b46aa4c478d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1054,7 +1054,6 @@ static int nful_open(struct inode *inode, struct file *file)
}
static const struct file_operations nful_file_ops = {
- .owner = THIS_MODULE,
.open = nful_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1093,10 +1092,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
static void __net_exit nfnl_log_net_exit(struct net *net)
{
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
+ unsigned int i;
+
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
#endif
nf_log_unset(net, &nfulnl_logger);
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
}
static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a16356cacec3..8bba23160a68 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -941,23 +941,18 @@ static struct notifier_block nfqnl_dev_notifier = {
.notifier_call = nfqnl_rcv_dev_event,
};
-static unsigned int nfqnl_nf_hook_drop(struct net *net)
+static void nfqnl_nf_hook_drop(struct net *net)
{
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
- unsigned int instances = 0;
int i;
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
- hlist_for_each_entry_rcu(inst, head, hlist) {
+ hlist_for_each_entry_rcu(inst, head, hlist)
nfqnl_flush(inst, NULL, 0);
- instances++;
- }
}
-
- return instances;
}
static int
@@ -1482,7 +1477,6 @@ static int nfqnl_open(struct inode *inode, struct file *file)
}
static const struct file_operations nfqnl_file_ops = {
- .owner = THIS_MODULE,
.open = nfqnl_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1512,10 +1506,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
static void __net_exit nfnl_queue_net_exit(struct net *net)
{
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ unsigned int i;
+
nf_unregister_queue_handler(net);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
#endif
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
}
static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index c2945eb3397c..fa90a8402845 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -44,6 +44,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
case NFT_CMP_LT:
if (d == 0)
goto mismatch;
+ /* fall through */
case NFT_CMP_LTE:
if (d > 0)
goto mismatch;
@@ -51,6 +52,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
case NFT_CMP_GT:
if (d == 0)
goto mismatch;
+ /* fall through */
case NFT_CMP_GTE:
if (d < 0)
goto mismatch;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index b89f4f65b2a0..8e23726b9081 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -144,7 +144,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
{
par->net = ctx->net;
par->table = ctx->table->name;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case AF_INET:
entry->e4.ip.proto = proto;
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
@@ -169,13 +169,13 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
par->hook_mask = 1 << ops->hooknum;
} else {
par->hook_mask = 0;
}
- par->family = ctx->afi->family;
+ par->family = ctx->family;
par->nft_compat = true;
}
@@ -267,7 +267,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
par.net = ctx->net;
par.target = target;
par.targinfo = info;
- par.family = ctx->afi->family;
+ par.family = ctx->family;
if (par.target->destroy != NULL)
par.target->destroy(&par);
@@ -302,7 +302,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
hook_mask = 1 << ops->hooknum;
if (target->hooks && !(hook_mask & target->hooks))
@@ -358,7 +358,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
{
par->net = ctx->net;
par->table = ctx->table->name;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case AF_INET:
entry->e4.ip.proto = proto;
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
@@ -383,13 +383,13 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
par->hook_mask = 1 << ops->hooknum;
} else {
par->hook_mask = 0;
}
- par->family = ctx->afi->family;
+ par->family = ctx->family;
par->nft_compat = true;
}
@@ -446,7 +446,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
par.net = ctx->net;
par.match = match;
par.matchinfo = info;
- par.family = ctx->afi->family;
+ par.family = ctx->family;
if (par.match->destroy != NULL)
par.match->destroy(&par);
@@ -481,7 +481,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
hook_mask = 1 << ops->hooknum;
if (match->hooks && !(hook_mask & match->hooks))
@@ -648,7 +648,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
mt_name = nla_data(tb[NFTA_MATCH_NAME]);
rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
- family = ctx->afi->family;
+ family = ctx->family;
/* Re-use the existing match if it's already loaded. */
list_for_each_entry(nft_match, &nft_match_list, head) {
@@ -733,7 +733,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
tg_name = nla_data(tb[NFTA_TARGET_NAME]);
rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
- family = ctx->afi->family;
+ family = ctx->family;
/* Re-use the existing target if it's already loaded. */
list_for_each_entry(nft_target, &nft_target_list, head) {
@@ -812,8 +812,6 @@ static int __init nft_compat_module_init(void)
goto err_target;
}
- pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n");
-
return ret;
err_target:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 2647b895f4b0..6ab274b14484 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -405,7 +405,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case NFPROTO_IPV4:
len = FIELD_SIZEOF(struct nf_conntrack_tuple,
src.u3.ip);
@@ -456,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nf_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
if (err < 0)
return err;
@@ -550,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
- err = nf_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
if (err < 0)
goto err1;
@@ -564,7 +564,7 @@ err1:
static void nft_ct_get_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
- nf_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->family);
}
static void nft_ct_set_destroy(const struct nft_ctx *ctx,
@@ -573,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
struct nft_ct *priv = nft_expr_priv(expr);
__nft_ct_set_destroy(ctx, priv);
- nf_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -734,7 +734,7 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
struct nft_ct_helper_obj *priv = nft_obj_data(obj);
struct nf_conntrack_helper *help4, *help6;
char name[NF_CT_HELPER_NAME_LEN];
- int family = ctx->afi->family;
+ int family = ctx->family;
if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
return -EINVAL;
@@ -753,14 +753,14 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
switch (family) {
case NFPROTO_IPV4:
- if (ctx->afi->family == NFPROTO_IPV6)
+ if (ctx->family == NFPROTO_IPV6)
return -EINVAL;
help4 = nf_conntrack_helper_try_module_get(name, family,
priv->l4proto);
break;
case NFPROTO_IPV6:
- if (ctx->afi->family == NFPROTO_IPV4)
+ if (ctx->family == NFPROTO_IPV4)
return -EINVAL;
help6 = nf_conntrack_helper_try_module_get(name, family,
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 66221ad891a9..fc83e29d6634 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -164,7 +164,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
}
priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
- err = nft_validate_register_load(priv->sreg_key, set->klen);;
+ err = nft_validate_register_load(priv->sreg_key, set->klen);
if (err < 0)
return err;
@@ -184,7 +184,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (tb[NFTA_DYNSET_EXPR] != NULL) {
if (!(set->flags & NFT_SET_EVAL))
return -EINVAL;
- if (!(set->flags & NFT_SET_ANONYMOUS))
+ if (!nft_set_is_anonymous(set))
return -EOPNOTSUPP;
priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a0a93d987a3b..47ec1046ad11 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
[NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
[NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
};
static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
new file mode 100644
index 000000000000..4503b8dcf9c0
--- /dev/null
+++ b/net/netfilter/nft_flow_offload.c
@@ -0,0 +1,264 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/ip.h> /* for ipv4 options. */
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct nft_flow_offload {
+ struct nft_flowtable *flowtable;
+};
+
+static int nft_flow_route(const struct nft_pktinfo *pkt,
+ const struct nf_conn *ct,
+ struct nf_flow_route *route,
+ enum ip_conntrack_dir dir)
+{
+ struct dst_entry *this_dst = skb_dst(pkt->skb);
+ struct dst_entry *other_dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ break;
+ }
+
+ nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
+ if (!other_dst)
+ return -ENOENT;
+
+ route->tuple[dir].dst = this_dst;
+ route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
+ route->tuple[!dir].dst = other_dst;
+ route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
+
+ return 0;
+}
+
+static bool nft_flow_offload_skip(struct sk_buff *skb)
+{
+ struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ if (skb_sec_path(skb))
+ return true;
+
+ return false;
+}
+
+static void nft_flow_offload_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+ struct nf_flowtable *flowtable = &priv->flowtable->data;
+ enum ip_conntrack_info ctinfo;
+ struct nf_flow_route route;
+ struct flow_offload *flow;
+ enum ip_conntrack_dir dir;
+ struct nf_conn *ct;
+ int ret;
+
+ if (nft_flow_offload_skip(pkt->skb))
+ goto out;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ if (!ct)
+ goto out;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ goto out;
+ }
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ goto out;
+
+ if (ctinfo == IP_CT_NEW ||
+ ctinfo == IP_CT_RELATED)
+ goto out;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ goto out;
+
+ dir = CTINFO2DIR(ctinfo);
+ if (nft_flow_route(pkt, ct, &route, dir) < 0)
+ goto err_flow_route;
+
+ flow = flow_offload_alloc(ct, &route);
+ if (!flow)
+ goto err_flow_alloc;
+
+ ret = flow_offload_add(flowtable, flow);
+ if (ret < 0)
+ goto err_flow_add;
+
+ return;
+
+err_flow_add:
+ flow_offload_free(flow);
+err_flow_alloc:
+ dst_release(route.tuple[!dir].dst);
+err_flow_route:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+out:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_flow_offload_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ unsigned int hook_mask = (1 << NF_INET_FORWARD);
+
+ return nft_chain_validate_hooks(ctx->chain, hook_mask);
+}
+
+static int nft_flow_offload_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_flowtable *flowtable;
+
+ if (!tb[NFTA_FLOW_TABLE_NAME])
+ return -EINVAL;
+
+ flowtable = nf_tables_flowtable_lookup(ctx->table,
+ tb[NFTA_FLOW_TABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable))
+ return PTR_ERR(flowtable);
+
+ priv->flowtable = flowtable;
+ flowtable->use++;
+
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ priv->flowtable->use--;
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_flow_offload_type;
+static const struct nft_expr_ops nft_flow_offload_ops = {
+ .type = &nft_flow_offload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
+ .eval = nft_flow_offload_eval,
+ .init = nft_flow_offload_init,
+ .destroy = nft_flow_offload_destroy,
+ .validate = nft_flow_offload_validate,
+ .dump = nft_flow_offload_dump,
+};
+
+static struct nft_expr_type nft_flow_offload_type __read_mostly = {
+ .name = "flow_offload",
+ .ops = &nft_flow_offload_ops,
+ .maxattr = NFTA_FLOW_MAX,
+ .owner = THIS_MODULE,
+};
+
+static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
+{
+ struct net_device *dev = data;
+
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ return;
+
+ flow_offload_dead(flow);
+}
+
+static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
+ void *data)
+{
+ nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
+}
+
+static int flow_offload_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
+
+ nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+ .notifier_call = flow_offload_netdev_event,
+};
+
+static int __init nft_flow_offload_module_init(void)
+{
+ int err;
+
+ register_netdevice_notifier(&flow_offload_netdev_notifier);
+
+ err = nft_register_expr(&nft_flow_offload_type);
+ if (err < 0)
+ goto register_expr;
+
+ return 0;
+
+register_expr:
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ return err;
+}
+
+static void __exit nft_flow_offload_module_exit(void)
+{
+ struct net *net;
+
+ nft_unregister_expr(&nft_flow_offload_type);
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ rtnl_lock();
+ for_each_net(net)
+ nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
+ rtnl_unlock();
+}
+
+module_init(nft_flow_offload_module_init);
+module_exit(nft_flow_offload_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("flow_offload");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 6f6e64423643..a27be36dc0af 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -112,7 +112,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
- err = nf_logger_find_get(ctx->afi->family, li->type);
+ err = nf_logger_find_get(ctx->family, li->type);
if (err < 0)
goto err1;
@@ -133,7 +133,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
- nf_logger_put(ctx->afi->family, li->type);
+ nf_logger_put(ctx->family, li->type);
}
static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 6ac03d4266c9..9d8655bc1bea 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -73,7 +73,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
}
}
- return nf_ct_netns_get(ctx->net, ctx->afi->family);
+ return nf_ct_netns_get(ctx->net, ctx->family);
}
EXPORT_SYMBOL_GPL(nft_masq_init);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5a60eb23a7ed..8fb91940e2e7 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -210,6 +210,11 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*dest = prandom_u32_state(state);
break;
}
+#ifdef CONFIG_XFRM
+ case NFT_META_SECPATH:
+ nft_reg_store8(dest, !!skb->sp);
+ break;
+#endif
default:
WARN_ON(1);
goto err;
@@ -308,6 +313,11 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
+#ifdef CONFIG_XFRM
+ case NFT_META_SECPATH:
+ len = sizeof(u8);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -318,6 +328,38 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
+static int nft_meta_get_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+#ifdef CONFIG_XFRM
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ if (priv->key != NFT_META_SECPATH)
+ return 0;
+
+ switch (ctx->family) {
+ case NFPROTO_NETDEV:
+ hooks = 1 << NF_NETDEV_INGRESS;
+ break;
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+#else
+ return 0;
+#endif
+}
+
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
@@ -328,7 +370,7 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
if (priv->key != NFT_META_PKTTYPE)
return 0;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case NFPROTO_BRIDGE:
hooks = 1 << NF_BR_PRE_ROUTING;
break;
@@ -434,6 +476,7 @@ static const struct nft_expr_ops nft_meta_get_ops = {
.eval = nft_meta_get_eval,
.init = nft_meta_get_init,
.dump = nft_meta_get_dump,
+ .validate = nft_meta_get_validate,
};
static const struct nft_expr_ops nft_meta_set_ops = {
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ed548d06b6dd..1f36954c2ba9 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -142,7 +142,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (family != ctx->afi->family)
+ if (family != ctx->family)
return -EOPNOTSUPP;
switch (family) {
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 1e66538bf0ff..c64cbe78dee7 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -75,7 +75,7 @@ int nft_redir_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- return nf_ct_netns_get(ctx->net, ctx->afi->family);
+ return nf_ct_netns_get(ctx->net, ctx->family);
}
EXPORT_SYMBOL_GPL(nft_redir_init);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index a6b7d05aeacf..11a2071b6dd4 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -27,7 +27,7 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
{
u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
const struct sk_buff *skb = pkt->skb;
- const struct nf_afinfo *ai;
+ struct dst_entry *dst = NULL;
struct flowi fl;
memset(&fl, 0, sizeof(fl));
@@ -43,15 +43,10 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
break;
}
- ai = nf_get_afinfo(nft_pf(pkt));
- if (ai) {
- struct dst_entry *dst = NULL;
-
- ai->route(nft_net(pkt), &dst, &fl, false);
- if (dst) {
- mtu = min(mtu, dst_mtu(dst));
- dst_release(dst);
- }
+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
+ if (dst) {
+ mtu = min(mtu, dst_mtu(dst));
+ dst_release(dst);
}
if (mtu <= minlen || mtu > 0xffff)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index f8166c1d5430..3f1624ee056f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -251,11 +251,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (err)
return;
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN) {
- iter->err = err;
- goto out;
- }
+ rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
@@ -306,9 +302,7 @@ static void nft_rhash_gc(struct work_struct *work)
if (err)
goto schedule;
- err = rhashtable_walk_start(&hti);
- if (err && err != -EAGAIN)
- goto out;
+ rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
new file mode 100644
index 000000000000..0b660c568156
--- /dev/null
+++ b/net/netfilter/utils.c
@@ -0,0 +1,90 @@
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_queue.h>
+
+__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol,
+ unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ __sum16 csum = 0;
+
+ switch (family) {
+ case AF_INET:
+ csum = nf_ip_checksum(skb, hook, dataoff, protocol);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ csum = v6ops->checksum(skb, hook, dataoff, protocol);
+ break;
+ }
+
+ return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum);
+
+__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol, unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ __sum16 csum = 0;
+
+ switch (family) {
+ case AF_INET:
+ csum = nf_ip_checksum_partial(skb, hook, dataoff, len,
+ protocol);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ csum = v6ops->checksum_partial(skb, hook, dataoff, len,
+ protocol);
+ break;
+ }
+
+ return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum_partial);
+
+int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict, unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (family) {
+ case AF_INET:
+ ret = nf_ip_route(net, dst, fl, strict);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->route(net, dst, fl, strict);
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_route);
+
+int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ ret = nf_ip_reroute(skb, entry);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->reroute(skb, entry);
+ break;
+ }
+ return ret;
+}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a77dd514297c..8fa4d37141a7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -39,7 +39,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
#define XT_PCPU_BLOCK_SIZE 4096
struct compat_delta {
@@ -210,6 +209,9 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
{
struct xt_match *match;
+ if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+ return ERR_PTR(-EINVAL);
+
match = xt_find_match(nfproto, name, revision);
if (IS_ERR(match)) {
request_module("%st_%s", xt_prefix[nfproto], name);
@@ -252,6 +254,9 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
{
struct xt_target *target;
+ if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
+ return ERR_PTR(-EINVAL);
+
target = xt_find_target(af, name, revision);
if (IS_ERR(target)) {
request_module("%st_%s", xt_prefix[af], name);
@@ -1000,7 +1005,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
return NULL;
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
- if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
+ if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
info = kvmalloc(sz, GFP_KERNEL);
@@ -1027,7 +1032,7 @@ void xt_free_table_info(struct xt_table_info *info)
}
EXPORT_SYMBOL(xt_free_table_info);
-/* Find table by name, grabs mutex & ref. Returns NULL on error. */
+/* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
@@ -1043,17 +1048,17 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
/* Table doesn't exist in this netns, re-try init */
list_for_each_entry(t, &init_net.xt.tables[af], list) {
+ int err;
+
if (strcmp(t->name, name))
continue;
- if (!try_module_get(t->me)) {
- mutex_unlock(&xt[af].mutex);
- return NULL;
- }
-
+ if (!try_module_get(t->me))
+ goto out;
mutex_unlock(&xt[af].mutex);
- if (t->table_init(net) != 0) {
+ err = t->table_init(net);
+ if (err < 0) {
module_put(t->me);
- return NULL;
+ return ERR_PTR(err);
}
found = t;
@@ -1073,10 +1078,28 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
module_put(found->me);
out:
mutex_unlock(&xt[af].mutex);
- return NULL;
+ return ERR_PTR(-ENOENT);
}
EXPORT_SYMBOL_GPL(xt_find_table_lock);
+struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
+ const char *name)
+{
+ struct xt_table *t = xt_find_table_lock(net, af, name);
+
+#ifdef CONFIG_MODULES
+ if (IS_ERR(t)) {
+ int err = request_module("%stable_%s", xt_prefix[af], name);
+ if (err < 0)
+ return ERR_PTR(err);
+ t = xt_find_table_lock(net, af, name);
+ }
+#endif
+
+ return t;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_table_lock);
+
void xt_table_unlock(struct xt_table *table)
{
mutex_unlock(&xt[table->af].mutex);
@@ -1344,7 +1367,6 @@ static int xt_table_open(struct inode *inode, struct file *file)
}
static const struct file_operations xt_table_ops = {
- .owner = THIS_MODULE,
.open = xt_table_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1397,7 +1419,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
trav->curr = trav->curr->next;
if (trav->curr != trav->head)
break;
- /* fallthru, _stop will unlock */
+ /* fall through */
default:
return NULL;
}
@@ -1480,7 +1502,6 @@ static int xt_match_open(struct inode *inode, struct file *file)
}
static const struct file_operations xt_match_ops = {
- .owner = THIS_MODULE,
.open = xt_match_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1533,7 +1554,6 @@ static int xt_target_open(struct inode *inode, struct file *file)
}
static const struct file_operations xt_target_ops = {
- .owner = THIS_MODULE,
.open = xt_target_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1729,8 +1749,17 @@ static int __net_init xt_net_init(struct net *net)
return 0;
}
+static void __net_exit xt_net_exit(struct net *net)
+{
+ int i;
+
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
+}
+
static struct pernet_operations xt_net_ops = {
.init = xt_net_init,
+ .exit = xt_net_exit,
};
static int __init xt_init(void)
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index ee3421ad108d..6c2482b709b1 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -252,6 +252,7 @@ static struct xt_target idletimer_tg __read_mostly = {
.family = NFPROTO_UNSPEC,
.target = idletimer_tg_target,
.targetsize = sizeof(struct idletimer_tg_info),
+ .usersize = offsetof(struct idletimer_tg_info, timer),
.checkentry = idletimer_tg_checkentry,
.destroy = idletimer_tg_destroy,
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 0971634e5444..1dcad893df78 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -198,6 +198,7 @@ static struct xt_target led_tg_reg __read_mostly = {
.family = NFPROTO_UNSPEC,
.target = led_tg,
.targetsize = sizeof(struct xt_led_info),
+ .usersize = offsetof(struct xt_led_info, internal_data),
.checkentry = led_tg_check,
.destroy = led_tg_destroy,
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 9dae4d665965..99bb8e410f22 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -48,7 +48,6 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
unsigned int family)
{
struct flowi fl;
- const struct nf_afinfo *ai;
struct rtable *rt = NULL;
u_int32_t mtu = ~0U;
@@ -62,10 +61,8 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- ai = nf_get_afinfo(family);
- if (ai != NULL)
- ai->route(net, (struct dst_entry **)&rt, &fl, false);
+ nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
dst_release(&rt->dst);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 3b2be2ae6987..911a7c0da504 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip6t_addrtype");
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
const struct in6_addr *addr, u16 mask)
{
- const struct nf_afinfo *afinfo;
+ const struct nf_ipv6_ops *v6ops;
struct flowi6 flow;
struct rt6_info *rt;
u32 ret = 0;
@@ -47,17 +47,14 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (afinfo != NULL) {
- const struct nf_ipv6_ops *v6ops;
-
+ v6ops = nf_get_ipv6_ops();
+ if (v6ops) {
if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
- v6ops = nf_get_ipv6_ops();
- if (v6ops && v6ops->chk_addr(net, addr, dev, true))
+ if (v6ops->chk_addr(net, addr, dev, true))
ret = XT_ADDRTYPE_LOCAL;
}
- route_err = afinfo->route(net, (struct dst_entry **)&rt,
- flowi6_to_flowi(&flow), false);
+ route_err = v6ops->route(net, (struct dst_entry **)&rt,
+ flowi6_to_flowi(&flow), false);
} else {
route_err = 1;
}
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 041da0d9c06f..06b090d8e901 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
{
struct sock_fprog_kern program;
+ if (len > XT_BPF_MAX_NUM_INSTR)
+ return -EINVAL;
+
program.len = len;
program.filter = insns;
@@ -52,18 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
{
- mm_segment_t oldfs = get_fs();
- int retval, fd;
-
- set_fs(KERNEL_DS);
- fd = bpf_obj_get_user(path, 0);
- set_fs(oldfs);
- if (fd < 0)
- return fd;
-
- retval = __bpf_mt_check_fd(fd, ret);
- sys_close(fd);
- return retval;
+ if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
+ return -EINVAL;
+
+ *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+ return PTR_ERR_OR_ZERO(*ret);
}
static int bpf_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index a6214f235333..b1b17b9353e1 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -12,292 +12,30 @@
* GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/jhash.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/rbtree.h>
+
#include <linux/module.h>
-#include <linux/random.h>
#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_connlimit.h>
+
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
-
-#define CONNLIMIT_SLOTS 256U
-
-#ifdef CONFIG_LOCKDEP
-#define CONNLIMIT_LOCK_SLOTS 8U
-#else
-#define CONNLIMIT_LOCK_SLOTS 256U
-#endif
-
-#define CONNLIMIT_GC_MAX_NODES 8
-
-/* we will save the tuples of all connections we care about */
-struct xt_connlimit_conn {
- struct hlist_node node;
- struct nf_conntrack_tuple tuple;
-};
-
-struct xt_connlimit_rb {
- struct rb_node node;
- struct hlist_head hhead; /* connections/hosts in same subnet */
- union nf_inet_addr addr; /* search key */
-};
-
-static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
-
-struct xt_connlimit_data {
- struct rb_root climit_root[CONNLIMIT_SLOTS];
-};
-
-static u_int32_t connlimit_rnd __read_mostly;
-static struct kmem_cache *connlimit_rb_cachep __read_mostly;
-static struct kmem_cache *connlimit_conn_cachep __read_mostly;
-
-static inline unsigned int connlimit_iphash(__be32 addr)
-{
- return jhash_1word((__force __u32)addr,
- connlimit_rnd) % CONNLIMIT_SLOTS;
-}
-
-static inline unsigned int
-connlimit_iphash6(const union nf_inet_addr *addr)
-{
- return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
- connlimit_rnd) % CONNLIMIT_SLOTS;
-}
-
-static inline bool already_closed(const struct nf_conn *conn)
-{
- if (nf_ct_protonum(conn) == IPPROTO_TCP)
- return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
- conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
- else
- return 0;
-}
-
-static int
-same_source(const union nf_inet_addr *addr,
- const union nf_inet_addr *u3, u_int8_t family)
-{
- if (family == NFPROTO_IPV4)
- return ntohl(addr->ip) - ntohl(u3->ip);
-
- return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
-}
-
-static bool add_hlist(struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr)
-{
- struct xt_connlimit_conn *conn;
-
- conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return false;
- conn->tuple = *tuple;
- hlist_add_head(&conn->node, head);
- return true;
-}
-
-static unsigned int check_hlist(struct net *net,
- struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
-{
- const struct nf_conntrack_tuple_hash *found;
- struct xt_connlimit_conn *conn;
- struct hlist_node *n;
- struct nf_conn *found_ct;
- unsigned int length = 0;
-
- *addit = true;
-
- /* check the saved connections */
- hlist_for_each_entry_safe(conn, n, head, node) {
- found = nf_conntrack_find_get(net, zone, &conn->tuple);
- if (found == NULL) {
- hlist_del(&conn->node);
- kmem_cache_free(connlimit_conn_cachep, conn);
- continue;
- }
-
- found_ct = nf_ct_tuplehash_to_ctrack(found);
-
- if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
- /*
- * Just to be sure we have it only once in the list.
- * We should not see tuples twice unless someone hooks
- * this into a table without "-p tcp --syn".
- */
- *addit = false;
- } else if (already_closed(found_ct)) {
- /*
- * we do not care about connections which are
- * closed already -> ditch it
- */
- nf_ct_put(found_ct);
- hlist_del(&conn->node);
- kmem_cache_free(connlimit_conn_cachep, conn);
- continue;
- }
-
- nf_ct_put(found_ct);
- length++;
- }
-
- return length;
-}
-
-static void tree_nodes_free(struct rb_root *root,
- struct xt_connlimit_rb *gc_nodes[],
- unsigned int gc_count)
-{
- struct xt_connlimit_rb *rbconn;
-
- while (gc_count) {
- rbconn = gc_nodes[--gc_count];
- rb_erase(&rbconn->node, root);
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- }
-}
-
-static unsigned int
-count_tree(struct net *net, struct rb_root *root,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- u8 family, const struct nf_conntrack_zone *zone)
-{
- struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
- struct rb_node **rbnode, *parent;
- struct xt_connlimit_rb *rbconn;
- struct xt_connlimit_conn *conn;
- unsigned int gc_count;
- bool no_gc = false;
-
- restart:
- gc_count = 0;
- parent = NULL;
- rbnode = &(root->rb_node);
- while (*rbnode) {
- int diff;
- bool addit;
-
- rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
-
- parent = *rbnode;
- diff = same_source(addr, &rbconn->addr, family);
- if (diff < 0) {
- rbnode = &((*rbnode)->rb_left);
- } else if (diff > 0) {
- rbnode = &((*rbnode)->rb_right);
- } else {
- /* same source network -> be counted! */
- unsigned int count;
- count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
-
- tree_nodes_free(root, gc_nodes, gc_count);
- if (!addit)
- return count;
-
- if (!add_hlist(&rbconn->hhead, tuple, addr))
- return 0; /* hotdrop */
-
- return count + 1;
- }
-
- if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
- continue;
-
- /* only used for GC on hhead, retval and 'addit' ignored */
- check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
- if (hlist_empty(&rbconn->hhead))
- gc_nodes[gc_count++] = rbconn;
- }
-
- if (gc_count) {
- no_gc = true;
- tree_nodes_free(root, gc_nodes, gc_count);
- /* tree_node_free before new allocation permits
- * allocator to re-use newly free'd object.
- *
- * This is a rare event; in most cases we will find
- * existing node to re-use. (or gc_count is 0).
- */
- goto restart;
- }
-
- /* no match, need to insert new node */
- rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
- if (rbconn == NULL)
- return 0;
-
- conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
- if (conn == NULL) {
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- return 0;
- }
-
- conn->tuple = *tuple;
- rbconn->addr = *addr;
-
- INIT_HLIST_HEAD(&rbconn->hhead);
- hlist_add_head(&conn->node, &rbconn->hhead);
-
- rb_link_node(&rbconn->node, parent, rbnode);
- rb_insert_color(&rbconn->node, root);
- return 1;
-}
-
-static int count_them(struct net *net,
- struct xt_connlimit_data *data,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- u_int8_t family,
- const struct nf_conntrack_zone *zone)
-{
- struct rb_root *root;
- int count;
- u32 hash;
-
- if (family == NFPROTO_IPV6)
- hash = connlimit_iphash6(addr);
- else
- hash = connlimit_iphash(addr->ip);
- root = &data->climit_root[hash];
-
- spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
-
- count = count_tree(net, root, tuple, addr, family, zone);
-
- spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
-
- return count;
-}
+#include <net/netfilter/nf_conntrack_count.h>
static bool
connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct net *net = xt_net(par);
const struct xt_connlimit_info *info = par->matchinfo;
- union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int connections;
+ u32 key[5];
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) {
@@ -310,6 +48,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ union nf_inet_addr addr;
unsigned int i;
memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
@@ -317,22 +56,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
addr.ip6[i] &= info->mask.ip6[i];
+ memcpy(key, &addr, sizeof(addr.ip6));
+ key[4] = zone->id;
} else {
const struct iphdr *iph = ip_hdr(skb);
- addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+ key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
iph->daddr : iph->saddr;
- addr.ip &= info->mask.ip;
+ key[0] &= info->mask.ip;
+ key[1] = zone->id;
}
- connections = count_them(net, info->data, tuple_ptr, &addr,
- xt_family(par), zone);
+ connections = nf_conncount_count(net, info->data, key,
+ xt_family(par), tuple_ptr, zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
- return (connections > info->limit) ^
- !!(info->flags & XT_CONNLIMIT_INVERT);
+ return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);
hotdrop:
par->hotdrop = true;
@@ -342,61 +83,27 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int connlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_connlimit_info *info = par->matchinfo;
- unsigned int i;
- int ret;
+ unsigned int keylen;
- net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
-
- ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0) {
- pr_info("cannot load conntrack support for "
- "address family %u\n", par->family);
- return ret;
- }
+ keylen = sizeof(u32);
+ if (par->family == NFPROTO_IPV6)
+ keylen += sizeof(struct in6_addr);
+ else
+ keylen += sizeof(struct in_addr);
/* init private data */
- info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
- if (info->data == NULL) {
- nf_ct_netns_put(par->net, par->family);
- return -ENOMEM;
- }
-
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
- info->data->climit_root[i] = RB_ROOT;
+ info->data = nf_conncount_init(par->net, par->family, keylen);
+ if (IS_ERR(info->data))
+ return PTR_ERR(info->data);
return 0;
}
-static void destroy_tree(struct rb_root *r)
-{
- struct xt_connlimit_conn *conn;
- struct xt_connlimit_rb *rbconn;
- struct hlist_node *n;
- struct rb_node *node;
-
- while ((node = rb_first(r)) != NULL) {
- rbconn = rb_entry(node, struct xt_connlimit_rb, node);
-
- rb_erase(node, r);
-
- hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
- kmem_cache_free(connlimit_conn_cachep, conn);
-
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- }
-}
-
static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_connlimit_info *info = par->matchinfo;
- unsigned int i;
-
- nf_ct_netns_put(par->net, par->family);
-
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
- destroy_tree(&info->data->climit_root[i]);
- kfree(info->data);
+ nf_conncount_destroy(par->net, par->family, info->data);
}
static struct xt_match connlimit_mt_reg __read_mostly = {
@@ -413,40 +120,12 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
static int __init connlimit_mt_init(void)
{
- int ret, i;
-
- BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
- BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
-
- for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
- spin_lock_init(&xt_connlimit_locks[i]);
-
- connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
- sizeof(struct xt_connlimit_conn),
- 0, 0, NULL);
- if (!connlimit_conn_cachep)
- return -ENOMEM;
-
- connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
- sizeof(struct xt_connlimit_rb),
- 0, 0, NULL);
- if (!connlimit_rb_cachep) {
- kmem_cache_destroy(connlimit_conn_cachep);
- return -ENOMEM;
- }
- ret = xt_register_match(&connlimit_mt_reg);
- if (ret != 0) {
- kmem_cache_destroy(connlimit_conn_cachep);
- kmem_cache_destroy(connlimit_rb_cachep);
- }
- return ret;
+ return xt_register_match(&connlimit_mt_reg);
}
static void __exit connlimit_mt_exit(void)
{
xt_unregister_match(&connlimit_mt_reg);
- kmem_cache_destroy(connlimit_conn_cachep);
- kmem_cache_destroy(connlimit_rb_cachep);
}
module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5da8746f7b88..ca6847403ca2 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -353,7 +353,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
static bool select_all(const struct xt_hashlimit_htable *ht,
const struct dsthash_ent *he)
{
- return 1;
+ return true;
}
static bool select_gc(const struct xt_hashlimit_htable *ht,
@@ -1266,7 +1266,6 @@ static int dl_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations dl_file_ops_v2 = {
- .owner = THIS_MODULE,
.open = dl_proc_open_v2,
.read = seq_read,
.llseek = seq_lseek,
@@ -1274,7 +1273,6 @@ static const struct file_operations dl_file_ops_v2 = {
};
static const struct file_operations dl_file_ops_v1 = {
- .owner = THIS_MODULE,
.open = dl_proc_open_v1,
.read = seq_read,
.llseek = seq_lseek,
@@ -1282,7 +1280,6 @@ static const struct file_operations dl_file_ops_v1 = {
};
static const struct file_operations dl_file_ops = {
- .owner = THIS_MODULE,
.open = dl_proc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index 000e70377f85..7ca64a50db04 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -58,7 +58,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
pr_debug("Dropping evil IPComp tinygram.\n");
par->hotdrop = true;
- return 0;
+ return false;
}
return spi_match(compinfo->spis[0], compinfo->spis[1],
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index d27b5f1ea619..61403b77361c 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -193,9 +193,8 @@ static struct xt_match limit_mt_reg __read_mostly = {
.compatsize = sizeof(struct compat_xt_rateinfo),
.compat_from_user = limit_mt_compat_from_user,
.compat_to_user = limit_mt_compat_to_user,
-#else
- .usersize = offsetof(struct xt_rateinfo, prev),
#endif
+ .usersize = offsetof(struct xt_rateinfo, prev),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index cc0518fe598e..6f92d25590a8 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -62,6 +62,7 @@ static struct xt_match nfacct_mt_reg __read_mostly = {
.match = nfacct_mt,
.destroy = nfacct_mt_destroy,
.matchsize = sizeof(struct xt_nfacct_match_info),
+ .usersize = offsetof(struct xt_nfacct_match_info, nfacct),
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 36e14b1f061d..a34f314a8c23 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/capability.h>
#include <linux/if.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
struct xt_osf_finger *kf = NULL, *sf;
int err = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
struct xt_osf_finger *sf;
int err = -ENOENT;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 2b4ab189bba7..5639fb03bdd9 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -93,7 +93,8 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
if (dst->xfrm == NULL)
return -1;
- for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+ for (i = 0; dst && dst->xfrm;
+ dst = ((struct xfrm_dst *)dst)->child, i++) {
pos = strict ? i : 0;
if (pos >= info->len)
return 0;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 64285702afd5..16b6b11ee83f 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -39,13 +39,17 @@ match_set(ip_set_id_t index, const struct sk_buff *skb,
return inv;
}
-#define ADT_OPT(n, f, d, fs, cfs, t) \
-struct ip_set_adt_opt n = { \
- .family = f, \
- .dim = d, \
- .flags = fs, \
- .cmdflags = cfs, \
- .ext.timeout = t, \
+#define ADT_OPT(n, f, d, fs, cfs, t, p, b, po, bo) \
+struct ip_set_adt_opt n = { \
+ .family = f, \
+ .dim = d, \
+ .flags = fs, \
+ .cmdflags = cfs, \
+ .ext.timeout = t, \
+ .ext.packets = p, \
+ .ext.bytes = b, \
+ .ext.packets_op = po, \
+ .ext.bytes_op = bo, \
}
/* Revision 0 interface: backward compatible with netfilter/iptables */
@@ -56,7 +60,8 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_set_info_match_v0 *info = par->matchinfo;
ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
- info->match_set.u.compat.flags, 0, UINT_MAX);
+ info->match_set.u.compat.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
return match_set(info->match_set.index, skb, par, &opt,
info->match_set.u.compat.flags & IPSET_INV_MATCH);
@@ -119,7 +124,8 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_set_info_match_v1 *info = par->matchinfo;
ADT_OPT(opt, xt_family(par), info->match_set.dim,
- info->match_set.flags, 0, UINT_MAX);
+ info->match_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
if (opt.flags & IPSET_RETURN_NOMATCH)
opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
@@ -161,45 +167,21 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par)
/* Revision 3 match */
static bool
-match_counter0(u64 counter, const struct ip_set_counter_match0 *info)
-{
- switch (info->op) {
- case IPSET_COUNTER_NONE:
- return true;
- case IPSET_COUNTER_EQ:
- return counter == info->value;
- case IPSET_COUNTER_NE:
- return counter != info->value;
- case IPSET_COUNTER_LT:
- return counter < info->value;
- case IPSET_COUNTER_GT:
- return counter > info->value;
- }
- return false;
-}
-
-static bool
set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v3 *info = par->matchinfo;
- int ret;
ADT_OPT(opt, xt_family(par), info->match_set.dim,
- info->match_set.flags, info->flags, UINT_MAX);
+ info->match_set.flags, info->flags, UINT_MAX,
+ info->packets.value, info->bytes.value,
+ info->packets.op, info->bytes.op);
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
- ret = match_set(info->match_set.index, skb, par, &opt,
- info->match_set.flags & IPSET_INV_MATCH);
-
- if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
- return ret;
-
- if (!match_counter0(opt.ext.packets, &info->packets))
- return false;
- return match_counter0(opt.ext.bytes, &info->bytes);
+ return match_set(info->match_set.index, skb, par, &opt,
+ info->match_set.flags & IPSET_INV_MATCH);
}
#define set_match_v3_checkentry set_match_v1_checkentry
@@ -208,45 +190,21 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
/* Revision 4 match */
static bool
-match_counter(u64 counter, const struct ip_set_counter_match *info)
-{
- switch (info->op) {
- case IPSET_COUNTER_NONE:
- return true;
- case IPSET_COUNTER_EQ:
- return counter == info->value;
- case IPSET_COUNTER_NE:
- return counter != info->value;
- case IPSET_COUNTER_LT:
- return counter < info->value;
- case IPSET_COUNTER_GT:
- return counter > info->value;
- }
- return false;
-}
-
-static bool
set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v4 *info = par->matchinfo;
- int ret;
ADT_OPT(opt, xt_family(par), info->match_set.dim,
- info->match_set.flags, info->flags, UINT_MAX);
+ info->match_set.flags, info->flags, UINT_MAX,
+ info->packets.value, info->bytes.value,
+ info->packets.op, info->bytes.op);
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
- ret = match_set(info->match_set.index, skb, par, &opt,
- info->match_set.flags & IPSET_INV_MATCH);
-
- if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
- return ret;
-
- if (!match_counter(opt.ext.packets, &info->packets))
- return false;
- return match_counter(opt.ext.bytes, &info->bytes);
+ return match_set(info->match_set.index, skb, par, &opt,
+ info->match_set.flags & IPSET_INV_MATCH);
}
#define set_match_v4_checkentry set_match_v1_checkentry
@@ -260,9 +218,11 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_set_info_target_v0 *info = par->targinfo;
ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
- info->add_set.u.compat.flags, 0, UINT_MAX);
+ info->add_set.u.compat.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
- info->del_set.u.compat.flags, 0, UINT_MAX);
+ info->del_set.u.compat.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
@@ -333,9 +293,11 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_set_info_target_v1 *info = par->targinfo;
ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
- info->add_set.flags, 0, UINT_MAX);
+ info->add_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
- info->del_set.flags, 0, UINT_MAX);
+ info->del_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
@@ -402,9 +364,11 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_set_info_target_v2 *info = par->targinfo;
ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
- info->add_set.flags, info->flags, info->timeout);
+ info->add_set.flags, info->flags, info->timeout,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
- info->del_set.flags, 0, UINT_MAX);
+ info->del_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
@@ -432,11 +396,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
int ret;
ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
- info->add_set.flags, info->flags, info->timeout);
+ info->add_set.flags, info->flags, info->timeout,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
- info->del_set.flags, 0, UINT_MAX);
+ info->del_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
- info->map_set.flags, 0, UINT_MAX);
+ info->map_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 11de55e7a868..8710fdba2ae2 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -84,6 +84,7 @@ static struct xt_match xt_statistic_mt_reg __read_mostly = {
.checkentry = statistic_mt_check,
.destroy = statistic_mt_destroy,
.matchsize = sizeof(struct xt_statistic_info),
+ .usersize = offsetof(struct xt_statistic_info, master),
.me = THIS_MODULE,
};
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b9e0ee4e22f5..2ad445c1d27c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -65,6 +65,7 @@
#include <linux/net_namespace.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
@@ -145,8 +146,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
static BLOCKING_NOTIFIER_HEAD(netlink_chain);
-static DEFINE_SPINLOCK(netlink_tap_lock);
-static struct list_head netlink_tap_all __read_mostly;
static const struct rhashtable_params netlink_rhashtable_params;
@@ -173,14 +172,24 @@ static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
return new;
}
+static unsigned int netlink_tap_net_id;
+
+struct netlink_tap_net {
+ struct list_head netlink_tap_all;
+ struct mutex netlink_tap_lock;
+};
+
int netlink_add_tap(struct netlink_tap *nt)
{
+ struct net *net = dev_net(nt->dev);
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
+
if (unlikely(nt->dev->type != ARPHRD_NETLINK))
return -EINVAL;
- spin_lock(&netlink_tap_lock);
- list_add_rcu(&nt->list, &netlink_tap_all);
- spin_unlock(&netlink_tap_lock);
+ mutex_lock(&nn->netlink_tap_lock);
+ list_add_rcu(&nt->list, &nn->netlink_tap_all);
+ mutex_unlock(&nn->netlink_tap_lock);
__module_get(nt->module);
@@ -190,12 +199,14 @@ EXPORT_SYMBOL_GPL(netlink_add_tap);
static int __netlink_remove_tap(struct netlink_tap *nt)
{
+ struct net *net = dev_net(nt->dev);
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
bool found = false;
struct netlink_tap *tmp;
- spin_lock(&netlink_tap_lock);
+ mutex_lock(&nn->netlink_tap_lock);
- list_for_each_entry(tmp, &netlink_tap_all, list) {
+ list_for_each_entry(tmp, &nn->netlink_tap_all, list) {
if (nt == tmp) {
list_del_rcu(&nt->list);
found = true;
@@ -205,7 +216,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
pr_warn("__netlink_remove_tap: %p not found\n", nt);
out:
- spin_unlock(&netlink_tap_lock);
+ mutex_unlock(&nn->netlink_tap_lock);
if (found)
module_put(nt->module);
@@ -224,6 +235,26 @@ int netlink_remove_tap(struct netlink_tap *nt)
}
EXPORT_SYMBOL_GPL(netlink_remove_tap);
+static __net_init int netlink_tap_init_net(struct net *net)
+{
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
+
+ INIT_LIST_HEAD(&nn->netlink_tap_all);
+ mutex_init(&nn->netlink_tap_lock);
+ return 0;
+}
+
+static void __net_exit netlink_tap_exit_net(struct net *net)
+{
+}
+
+static struct pernet_operations netlink_tap_net_ops = {
+ .init = netlink_tap_init_net,
+ .exit = netlink_tap_exit_net,
+ .id = &netlink_tap_net_id,
+ .size = sizeof(struct netlink_tap_net),
+};
+
static bool netlink_filter_tap(const struct sk_buff *skb)
{
struct sock *sk = skb->sk;
@@ -253,6 +284,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
struct sock *sk = skb->sk;
int ret = -ENOMEM;
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return 0;
+
dev_hold(dev);
if (is_vmalloc_addr(skb->head))
@@ -274,7 +308,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
return ret;
}
-static void __netlink_deliver_tap(struct sk_buff *skb)
+static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn)
{
int ret;
struct netlink_tap *tmp;
@@ -282,19 +316,21 @@ static void __netlink_deliver_tap(struct sk_buff *skb)
if (!netlink_filter_tap(skb))
return;
- list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
+ list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) {
ret = __netlink_deliver_tap_skb(skb, tmp->dev);
if (unlikely(ret))
break;
}
}
-static void netlink_deliver_tap(struct sk_buff *skb)
+static void netlink_deliver_tap(struct net *net, struct sk_buff *skb)
{
+ struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
+
rcu_read_lock();
- if (unlikely(!list_empty(&netlink_tap_all)))
- __netlink_deliver_tap(skb);
+ if (unlikely(!list_empty(&nn->netlink_tap_all)))
+ __netlink_deliver_tap(skb, nn);
rcu_read_unlock();
}
@@ -303,7 +339,7 @@ static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
struct sk_buff *skb)
{
if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
- netlink_deliver_tap(skb);
+ netlink_deliver_tap(sock_net(dst), skb);
}
static void netlink_overrun(struct sock *sk)
@@ -1213,7 +1249,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
int len = skb->len;
- netlink_deliver_tap(skb);
+ netlink_deliver_tap(sock_net(sk), skb);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
@@ -2381,13 +2417,14 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *,
struct netlink_ext_ack *))
{
- struct netlink_ext_ack extack = {};
+ struct netlink_ext_ack extack;
struct nlmsghdr *nlh;
int err;
while (skb->len >= nlmsg_total_size(0)) {
int msglen;
+ memset(&extack, 0, sizeof(extack));
nlh = nlmsg_hdr(skb);
err = 0;
@@ -2478,8 +2515,9 @@ static int netlink_walk_start(struct nl_seq_iter *iter)
return err;
}
- err = rhashtable_walk_start(&iter->hti);
- return err == -EAGAIN ? 0 : err;
+ rhashtable_walk_start(&iter->hti);
+
+ return 0;
}
static void netlink_walk_stop(struct nl_seq_iter *iter)
@@ -2600,7 +2638,6 @@ static int netlink_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations netlink_seq_fops = {
- .owner = THIS_MODULE,
.open = netlink_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2730,12 +2767,11 @@ static int __init netlink_proto_init(void)
}
}
- INIT_LIST_HEAD(&netlink_tap_all);
-
netlink_add_usersock_entry();
sock_register(&netlink_family_ops);
register_pernet_subsys(&netlink_net_ops);
+ register_pernet_subsys(&netlink_tap_net_ops);
/* The netlink device handler may be needed early. */
rtnetlink_init();
out:
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 8faa20b4d457..7dda33b9b784 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -115,11 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
if (!s_num)
rhashtable_walk_enter(&tbl->hash, hti);
- ret = rhashtable_walk_start(hti);
- if (ret == -EAGAIN)
- ret = 0;
- if (ret)
- goto stop;
+ rhashtable_walk_start(hti);
while ((nlsk = rhashtable_walk_next(hti))) {
if (IS_ERR(nlsk)) {
@@ -146,8 +142,8 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
}
}
-stop:
rhashtable_walk_stop(hti);
+
if (ret)
goto done;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 7ed9d4422a73..9ba30c63be3d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1344,7 +1344,6 @@ static int nr_info_open(struct inode *inode, struct file *file)
}
static const struct file_operations nr_info_fops = {
- .owner = THIS_MODULE,
.open = nr_info_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 75e6ba970fde..b5a7dcb30991 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -901,7 +901,6 @@ static int nr_node_info_open(struct inode *inode, struct file *file)
}
const struct file_operations nr_nodes_fops = {
- .owner = THIS_MODULE,
.open = nr_node_info_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -968,7 +967,6 @@ static int nr_neigh_info_open(struct inode *inode, struct file *file)
}
const struct file_operations nr_neigh_fops = {
- .owner = THIS_MODULE,
.open = nr_neigh_info_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index fb7afcaa3004..985909f105eb 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -531,7 +531,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
return 0;
}
-static inline unsigned int llcp_accept_poll(struct sock *parent)
+static inline __poll_t llcp_accept_poll(struct sock *parent)
{
struct nfc_llcp_sock *llcp_sock, *parent_sock;
struct sock *sk;
@@ -549,11 +549,11 @@ static inline unsigned int llcp_accept_poll(struct sock *parent)
return 0;
}
-static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
+static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
pr_debug("%p\n", sk);
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 8d104c1db628..a66f102c6c01 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -305,7 +305,7 @@ static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
return 0;
}
-static unsigned int nci_uart_tty_poll(struct tty_struct *tty,
+static __poll_t nci_uart_tty_poll(struct tty_struct *tty,
struct file *filp, poll_table *wait)
{
return 0;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index b27c5c6d9cab..c5904f629091 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1098,6 +1098,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
return 0;
}
+/* Trim the skb to the length specified by the IP/IPv6 header,
+ * removing any trailing lower-layer padding. This prepares the skb
+ * for higher-layer processing that assumes skb->len excludes padding
+ * (such as nf_ip_checksum). The caller needs to pull the skb to the
+ * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
+ */
+static int ovs_skb_network_trim(struct sk_buff *skb)
+{
+ unsigned int len;
+ int err;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ len = ntohs(ip_hdr(skb)->tot_len);
+ break;
+ case htons(ETH_P_IPV6):
+ len = sizeof(struct ipv6hdr)
+ + ntohs(ipv6_hdr(skb)->payload_len);
+ break;
+ default:
+ len = skb->len;
+ }
+
+ err = pskb_trim_rcsum(skb, len);
+ if (err)
+ kfree_skb(skb);
+
+ return err;
+}
+
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
@@ -1112,6 +1142,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
+ err = ovs_skb_network_trim(skb);
+ if (err)
+ return err;
+
if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
err = handle_fragments(net, key, info->zone.id, skb);
if (err)
@@ -1266,14 +1300,14 @@ static int parse_nat(const struct nlattr *attr,
/* Do not allow flags if no type is given. */
if (info->range.flags) {
OVS_NLERR(log,
- "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n"
+ "NAT flags may be given only when NAT range (SRC or DST) is also specified."
);
return -EINVAL;
}
info->nat = OVS_CT_NAT; /* NAT existing connections. */
} else if (!info->commit) {
OVS_NLERR(log,
- "NAT attributes may be specified only when CT COMMIT flag is also specified.\n"
+ "NAT attributes may be specified only when CT COMMIT flag is also specified."
);
return -EINVAL;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 99cfafc2a139..ef38e5aecd28 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -308,7 +308,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info,
uint32_t cutlen)
{
- unsigned short gso_type = skb_shinfo(skb)->gso_type;
+ unsigned int gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
int err;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..56b8e7167790 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -56,12 +56,12 @@
u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
- struct timespec cur_ts;
+ struct timespec64 cur_ts;
u64 cur_ms, idle_ms;
- ktime_get_ts(&cur_ts);
+ ktime_get_ts64(&cur_ts);
idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
- cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+ cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
cur_ts.tv_nsec / NSEC_PER_MSEC;
return cur_ms - idle_ms;
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return -EINVAL;
skb_reset_network_header(skb);
+ key->eth.type = skb->protocol;
} else {
eth = eth_hdr(skb);
ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
if (unlikely(parse_vlan(skb, key)))
return -ENOMEM;
- skb->protocol = parse_ethertype(skb);
- if (unlikely(skb->protocol == htons(0)))
+ key->eth.type = parse_ethertype(skb);
+ if (unlikely(key->eth.type == htons(0)))
return -ENOMEM;
+ /* Multiple tagged packets need to retain TPID to satisfy
+ * skb_vlan_pop(), which will later shift the ethertype into
+ * skb->protocol.
+ */
+ if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+ skb->protocol = key->eth.cvlan.tpid;
+ else
+ skb->protocol = key->eth.type;
+
skb_reset_network_header(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
}
skb_reset_mac_len(skb);
- key->eth.type = skb->protocol;
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index dc424798ba6f..7322aa1e382e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -330,12 +330,12 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
- /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+ /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
+ * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
- + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */
- + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
+ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
}
static size_t ovs_nsh_key_attr_size(void)
@@ -402,7 +402,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
- [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
+ [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE },
};
static const struct ovs_len_tbl
@@ -634,30 +634,30 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
-static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
+static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
struct sw_flow_match *match, bool is_mask,
bool log)
{
unsigned long opt_key_offset;
- struct erspan_metadata opts;
- BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
-
- memset(&opts, 0, sizeof(opts));
- opts.index = nla_get_be32(attr);
+ BUILD_BUG_ON(sizeof(struct erspan_metadata) >
+ sizeof(match->key->tun_opts));
- /* Index has only 20-bit */
- if (ntohl(opts.index) & ~INDEX_MASK) {
- OVS_NLERR(log, "ERSPAN index number %x too large.",
- ntohl(opts.index));
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
+ nla_len(a), sizeof(match->key->tun_opts));
return -EINVAL;
}
- SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
- opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
- SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
- is_mask);
+ if (!is_mask)
+ SW_FLOW_KEY_PUT(match, tun_opts_len,
+ sizeof(struct erspan_metadata), false);
+ else
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+ opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+ nla_len(a), is_mask);
return 0;
}
@@ -774,7 +774,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
return -EINVAL;
}
- err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
+ err = erspan_tun_opt_from_nlattr(a, match, is_mask,
+ log);
if (err)
return err;
@@ -906,8 +907,8 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
- ((struct erspan_metadata *)tun_opts)->index))
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
}
@@ -2241,14 +2242,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
+static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- if (size > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
- return ERR_PTR(-EINVAL);
- }
+ WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
@@ -2321,12 +2319,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = ksize(*sfa) * 2;
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
+ if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+ OVS_NLERR(log, "Flow action size exceeds max %u",
+ MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EMSGSIZE);
+ }
new_acts_size = MAX_ACTIONS_BUFSIZE;
}
- acts = nla_alloc_flow_actions(new_acts_size, log);
+ acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
return (void *)acts;
@@ -2501,7 +2502,7 @@ static int validate_geneve_opts(struct sw_flow_key *key)
option = (struct geneve_opt *)((u8 *)option + len);
opts_len -= len;
- };
+ }
key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
@@ -2536,7 +2537,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
break;
}
- };
+ }
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
if (start < 0)
@@ -3059,7 +3060,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
{
int err;
- *sfa = nla_alloc_flow_actions(nla_len(attr), log);
+ *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 3fbfc78991ac..04b94281a30b 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -488,7 +488,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
long long int max_bucket_size;
band = &meter->bands[i];
- max_bucket_size = (band->burst_size + band->rate) * 1000;
+ max_bucket_size = (band->burst_size + band->rate) * 1000LL;
band->bucket += delta_ms * band->rate;
if (band->bucket > max_bucket_size)
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 04a3128adcf0..bb95c43aae76 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -16,7 +16,6 @@
* 02110-1301, USA
*/
-#include <linux/hardirq.h>
#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
@@ -126,18 +125,12 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
}
}
-static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
-{
- dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
-}
-
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_get_stats64 = internal_get_stats,
- .ndo_set_rx_headroom = internal_set_rx_headroom,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -154,7 +147,7 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
- IFF_PHONY_HEADROOM | IFF_NO_QUEUE;
+ IFF_NO_QUEUE;
netdev->needs_free_netdev = true;
netdev->priv_destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
@@ -195,7 +188,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_netdev;
}
- vport->dev->needed_headroom = vport->dp->max_headroom;
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 737092ca9b4e..1d1483007e46 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -247,12 +247,13 @@ static int packet_direct_xmit(struct sk_buff *skb)
struct sk_buff *orig_skb = skb;
struct netdev_queue *txq;
int ret = NETDEV_TX_BUSY;
+ bool again = false;
if (unlikely(!netif_running(dev) ||
!netif_carrier_ok(dev)))
goto drop;
- skb = validate_xmit_skb_list(skb, dev);
+ skb = validate_xmit_skb_list(skb, dev, &again);
if (skb != orig_skb)
goto drop;
@@ -1687,7 +1688,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
atomic_long_set(&rollover->num, 0);
atomic_long_set(&rollover->num_huge, 0);
atomic_long_set(&rollover->num_failed, 0);
- po->rollover = rollover;
}
if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
@@ -1745,6 +1745,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
__dev_remove_pack(&po->prot_hook);
po->fanout = match;
+ po->rollover = rollover;
+ rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
__fanout_link(sk, po);
err = 0;
@@ -1758,10 +1760,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
}
out:
- if (err && rollover) {
- kfree_rcu(rollover, rcu);
- po->rollover = NULL;
- }
+ kfree(rollover);
mutex_unlock(&fanout_mutex);
return err;
}
@@ -1785,11 +1784,6 @@ static struct packet_fanout *fanout_release(struct sock *sk)
list_del(&f->list);
else
f = NULL;
-
- if (po->rollover) {
- kfree_rcu(po->rollover, rcu);
- po->rollover = NULL;
- }
}
mutex_unlock(&fanout_mutex);
@@ -3029,6 +3023,7 @@ static int packet_release(struct socket *sock)
synchronize_net();
if (f) {
+ kfree(po->rollover);
fanout_release_data(f);
kfree(f);
}
@@ -3097,6 +3092,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
if (need_rehook) {
if (po->running) {
rcu_read_unlock();
+ /* prevents packet_notifier() from calling
+ * register_prot_hook()
+ */
+ po->num = 0;
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -3105,6 +3104,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
dev->ifindex);
}
+ BUG_ON(po->running);
po->num = proto;
po->prot_hook.type = proto;
@@ -3843,7 +3843,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
- struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3922,18 +3921,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- rcu_read_lock();
- rollover = rcu_dereference(po->rollover);
- if (rollover) {
- rstats.tp_all = atomic_long_read(&rollover->num);
- rstats.tp_huge = atomic_long_read(&rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
- }
- rcu_read_unlock();
- if (!rollover)
+ if (!po->rollover)
return -EINVAL;
+ rstats.tp_all = atomic_long_read(&po->rollover->num);
+ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
@@ -4080,12 +4074,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
return 0;
}
-static unsigned int packet_poll(struct file *file, struct socket *sock,
+static __poll_t packet_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- unsigned int mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll(file, sock, wait);
spin_lock_bh(&sk->sk_receive_queue.lock);
if (po->rx_ring.pg_vec) {
@@ -4537,7 +4531,6 @@ static int packet_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations packet_seq_fops = {
- .owner = THIS_MODULE,
.open = packet_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 562fbc155006..a1d2b2319ae9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -95,7 +95,6 @@ struct packet_fanout {
struct packet_rollover {
int sock;
- struct rcu_head rcu;
atomic_long_t num;
atomic_long_t num_huge;
atomic_long_t num_failed;
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index da754fc926e7..871eaf2cb85e 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -299,16 +299,21 @@ out:
int __init phonet_netlink_register(void)
{
- int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit,
- NULL, 0);
+ int err = rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWADDR,
+ addr_doit, NULL, 0);
if (err)
return err;
- /* Further __rtnl_register() cannot fail */
- __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0);
- __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0);
- __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0);
- __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0);
- __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0);
+ /* Further rtnl_register_module() cannot fail */
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELADDR,
+ addr_doit, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETADDR,
+ NULL, getaddr_dumpit, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWROUTE,
+ route_doit, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE,
+ route_doit, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE,
+ NULL, route_dumpit, 0);
return 0;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1b050dd17393..08f6751d2030 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -341,12 +341,12 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
return 0;
}
-static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
+static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct pep_sock *pn = pep_sk(sk);
- unsigned int mask = 0;
+ __poll_t mask = 0;
poll_wait(file, sk_sleep(sk), wait);
@@ -635,7 +635,6 @@ static int pn_sock_open(struct inode *inode, struct file *file)
}
const struct file_operations pn_sock_seq_fops = {
- .owner = THIS_MODULE,
.open = pn_sock_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -818,7 +817,6 @@ static int pn_res_open(struct inode *inode, struct file *file)
}
const struct file_operations pn_res_seq_fops = {
- .owner = THIS_MODULE,
.open = pn_res_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 77ab05e23001..5fb3929e3d7d 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1116,9 +1116,13 @@ static int __init qrtr_proto_init(void)
return rc;
}
- rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
+ rc = rtnl_register_module(THIS_MODULE, PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
+ if (rc) {
+ sock_unregister(qrtr_family.family);
+ proto_unregister(&qrtr_proto);
+ }
- return 0;
+ return rc;
}
postcore_initcall(qrtr_proto_init);
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b405f77d664c..88aa8ad0f5b6 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -152,12 +152,12 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
* to send to a congested destination, the system call may still fail (and
* return ENOBUFS).
*/
-static unsigned int rds_poll(struct file *file, struct socket *sock,
+static __poll_t rds_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct rds_sock *rs = rds_sk_to_rs(sk);
- unsigned int mask = 0;
+ __poll_t mask = 0;
unsigned long flags;
poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 75d43dc8e96b..5aa3a64aa4f0 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -114,6 +114,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
rs, &addr, (int)ntohs(*port));
break;
} else {
+ rs->rs_bound_addr = 0;
rds_sock_put(rs);
ret = -ENOMEM;
break;
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 8398fee7c866..8d19fd25dce3 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
spin_lock_irqsave(&rds_cong_lock, flags);
list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
- if (!test_and_set_bit(0, &conn->c_map_queued)) {
+ struct rds_conn_path *cp = &conn->c_path[0];
+
+ rcu_read_lock();
+ if (!test_and_set_bit(0, &conn->c_map_queued) &&
+ !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
rds_stats_inc(s_cong_update_queued);
/* We cannot inline the call to rds_send_xmit() here
* for two reasons (both pertaining to a TCP transport):
@@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
* therefore trigger warnings.
* Defer the xmit to rds_send_worker() instead.
*/
- queue_delayed_work(rds_wq,
- &conn->c_path[0].cp_send_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
}
+ rcu_read_unlock();
}
spin_unlock_irqrestore(&rds_cong_lock, flags);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7ee2d5d68b78..b10c0ef36d8d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -230,8 +230,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
conn, &laddr, &faddr,
- trans->t_name ? trans->t_name : "[unknown]",
- is_outgoing ? "(outgoing)" : "");
+ strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name :
+ "[unknown]", is_outgoing ? "(outgoing)" : "");
/*
* Since we ran without holding the conn lock, someone could
@@ -382,10 +382,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
{
struct rds_message *rm, *rtmp;
+ set_bit(RDS_DESTROY_PENDING, &cp->cp_flags);
+
if (!cp->cp_transport_data)
return;
/* make sure lingering queued work won't try to ref the conn */
+ synchronize_rcu();
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
@@ -403,6 +406,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
if (cp->cp_xmit_rm)
rds_message_put(cp->cp_xmit_rm);
+ WARN_ON(delayed_work_pending(&cp->cp_send_w));
+ WARN_ON(delayed_work_pending(&cp->cp_recv_w));
+ WARN_ON(delayed_work_pending(&cp->cp_conn_w));
+ WARN_ON(work_pending(&cp->cp_down_w));
+
cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
}
@@ -424,7 +432,6 @@ void rds_conn_destroy(struct rds_connection *conn)
"%pI4\n", conn, &conn->c_laddr,
&conn->c_faddr);
- conn->c_destroy_in_prog = 1;
/* Ensure conn will not be scheduled for reconnect */
spin_lock_irq(&rds_conn_lock);
hlist_del_init_rcu(&conn->c_hash_node);
@@ -445,7 +452,6 @@ void rds_conn_destroy(struct rds_connection *conn)
*/
rds_cong_remove_conn(conn);
- put_net(conn->c_net);
kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
@@ -684,10 +690,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
{
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
- if (!destroy && cp->cp_conn->c_destroy_in_prog)
+ rcu_read_lock();
+ if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ rcu_read_unlock();
return;
-
+ }
queue_work(rds_wq, &cp->cp_down_w);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -704,9 +713,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
*/
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
{
+ rcu_read_lock();
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ rcu_read_unlock();
+ return;
+ }
if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
!test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 36dd2099048a..b2a5067b4afe 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -301,13 +301,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
if (rds_conn_state(conn) == RDS_CONN_UP) {
struct rds_ib_device *rds_ibdev;
- struct rdma_dev_addr *dev_addr;
ic = conn->c_transport_data;
- dev_addr = &ic->i_cm_id->route.addr.dev_addr;
- rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
- rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+ rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
+ (union ib_gid *)&iinfo->dst_gid);
rds_ibdev = ic->rds_ibdev;
iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8886f15abe90..634cfcb7bba6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
long i;
int ret;
- if (rs->rs_bound_addr == 0) {
+ if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
+ if (args->nr_local == 0)
+ return -EINVAL;
+
/* figure out the number of pages in the vector */
for (i = 0; i < args->nr_local; i++) {
if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c349c71babff..374ae83b60d4 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -88,6 +88,7 @@ enum {
#define RDS_RECONNECT_PENDING 1
#define RDS_IN_XMIT 2
#define RDS_RECV_REFILL 3
+#define RDS_DESTROY_PENDING 4
/* Max number of multipaths per RDS connection. Must be a power of 2 */
#define RDS_MPATH_WORKERS 8
@@ -139,8 +140,7 @@ struct rds_connection {
__be32 c_faddr;
unsigned int c_loopback:1,
c_ping_triggered:1,
- c_destroy_in_prog:1,
- c_pad_to_32:29;
+ c_pad_to_32:30;
int c_npaths;
struct rds_connection *c_passive;
struct rds_transport *c_trans;
@@ -150,7 +150,7 @@ struct rds_connection {
/* Protocol version */
unsigned int c_version;
- struct net *c_net;
+ possible_net_t c_net;
struct list_head c_map_item;
unsigned long c_map_queued;
@@ -165,13 +165,13 @@ struct rds_connection {
static inline
struct net *rds_conn_net(struct rds_connection *conn)
{
- return conn->c_net;
+ return read_pnet(&conn->c_net);
}
static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{
- conn->c_net = get_net(net);
+ write_pnet(&conn->c_net, net);
}
#define RDS_FLAG_CONG_BITMAP 0x01
diff --git a/net/rds/send.c b/net/rds/send.c
index b52cdc8ae428..d3e32d1f3c7d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -162,6 +162,12 @@ restart:
goto out;
}
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ release_in_xmit(cp);
+ ret = -ENETUNREACH; /* dont requeue send work */
+ goto out;
+ }
+
/*
* we record the send generation after doing the xmit acquire.
* if someone else manages to jump in and do some work, we'll use
@@ -437,7 +443,12 @@ over_batch:
!list_empty(&cp->cp_send_queue)) && !raced) {
if (batch_count < send_batch_count)
goto restart;
- queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_lock();
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ ret = -ENETUNREACH;
+ else
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_unlock();
} else if (raced) {
rds_stats_inc(s_send_lock_queue_raced);
}
@@ -1009,6 +1020,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
continue;
if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+ if (cmsg->cmsg_len <
+ CMSG_LEN(sizeof(struct rds_rdma_args)))
+ return -EINVAL;
args = CMSG_DATA(cmsg);
*rdma_bytes += args->remote_vec.bytes;
}
@@ -1148,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
else
cpath = &conn->c_path[0];
+ if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
rds_conn_path_connect_if_down(cpath);
ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
@@ -1187,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
rds_stats_inc(s_send_queued);
ret = rds_send_xmit(cpath);
- if (ret == -ENOMEM || ret == -EAGAIN)
- queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
-
+ if (ret == -ENOMEM || ret == -EAGAIN) {
+ ret = 0;
+ rcu_read_lock();
+ if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags))
+ ret = -ENETUNREACH;
+ else
+ queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
+ rcu_read_unlock();
+ }
+ if (ret)
+ goto out;
rds_message_put(rm);
return payload_len;
@@ -1267,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
rds_stats_inc(s_send_pong);
/* schedule the send work on rds_wq */
- queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_unlock();
rds_message_put(rm);
return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 6b7ee71f40c6..9920d2f84eff 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock)
sizeof(val));
}
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
- return tcp_sk(tc->t_sock->sk)->snd_nxt;
+ /* seq# of the last byte of data in tcp send buffer */
+ return tcp_sk(tc->t_sock->sk)->write_seq;
}
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
@@ -270,16 +271,33 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
return -EADDRNOTAVAIL;
}
+static void rds_tcp_conn_free(void *arg)
+{
+ struct rds_tcp_connection *tc = arg;
+ unsigned long flags;
+
+ rdsdebug("freeing tc %p\n", tc);
+
+ spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+ if (!tc->t_tcp_node_detached)
+ list_del(&tc->t_tcp_node);
+ spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+
+ kmem_cache_free(rds_tcp_conn_slab, tc);
+}
+
static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
{
struct rds_tcp_connection *tc;
- int i;
+ int i, j;
+ int ret = 0;
for (i = 0; i < RDS_MPATH_WORKERS; i++) {
tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
- if (!tc)
- return -ENOMEM;
-
+ if (!tc) {
+ ret = -ENOMEM;
+ break;
+ }
mutex_init(&tc->t_conn_path_lock);
tc->t_sock = NULL;
tc->t_tinc = NULL;
@@ -290,26 +308,17 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
tc->t_cpath = &conn->c_path[i];
spin_lock_irq(&rds_tcp_conn_lock);
+ tc->t_tcp_node_detached = false;
list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
spin_unlock_irq(&rds_tcp_conn_lock);
rdsdebug("rds_conn_path [%d] tc %p\n", i,
conn->c_path[i].cp_transport_data);
}
-
- return 0;
-}
-
-static void rds_tcp_conn_free(void *arg)
-{
- struct rds_tcp_connection *tc = arg;
- unsigned long flags;
- rdsdebug("freeing tc %p\n", tc);
-
- spin_lock_irqsave(&rds_tcp_conn_lock, flags);
- list_del(&tc->t_tcp_node);
- spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
-
- kmem_cache_free(rds_tcp_conn_slab, tc);
+ if (ret) {
+ for (j = 0; j < i; j++)
+ rds_tcp_conn_free(conn->c_path[j].cp_transport_data);
+ }
+ return ret;
}
static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
@@ -495,27 +504,6 @@ static struct pernet_operations rds_tcp_net_ops = {
.size = sizeof(struct rds_tcp_net),
};
-/* explicitly send a RST on each socket, thereby releasing any socket refcnts
- * that may otherwise hold up netns deletion.
- */
-static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
-{
- struct rds_conn_path *cp;
- struct rds_tcp_connection *tc;
- int i;
- struct sock *sk;
-
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
- cp = &conn->c_path[i];
- tc = cp->cp_transport_data;
- if (!tc->t_sock)
- continue;
- sk = tc->t_sock->sk;
- sk->sk_prot->disconnect(sk, 0);
- tcp_done(sk);
- }
-}
-
static void rds_tcp_kill_sock(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
@@ -527,18 +515,20 @@ static void rds_tcp_kill_sock(struct net *net)
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = tc->t_cpath->cp_conn->c_net;
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
- if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+ if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
list_move_tail(&tc->t_tcp_node, &tmp_list);
+ } else {
+ list_del(&tc->t_tcp_node);
+ tc->t_tcp_node_detached = true;
+ }
}
spin_unlock_irq(&rds_tcp_conn_lock);
- list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
- rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
+ list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
rds_conn_destroy(tc->t_cpath->cp_conn);
- }
}
void *rds_tcp_listen_sock_def_readable(struct net *net)
@@ -586,7 +576,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = tc->t_cpath->cp_conn->c_net;
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 1aafbf7c3011..c6fa080e9b6d 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,7 @@ struct rds_tcp_incoming {
struct rds_tcp_connection {
struct list_head t_tcp_node;
+ bool t_tcp_node_detached;
struct rds_conn_path *t_cpath;
/* t_conn_path_lock synchronizes the connection establishment between
* rds_tcp_accept_one and rds_tcp_conn_path_connect
@@ -54,7 +55,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 46f74dad0e16..534c67aeb20f 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
cp->cp_conn, tc, sock);
if (sock) {
- if (cp->cp_conn->c_destroy_in_prog)
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
rds_tcp_set_linger(sock);
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
lock_sock(sock->sk);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e006ef8e6d40..dd707b9e73e5 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk)
ready = tc->t_orig_data_ready;
rds_tcp_stats_inc(s_tcp_data_ready_calls);
- if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM)
- queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ rcu_read_unlock();
+ }
out:
read_unlock_bh(&sk->sk_callback_lock);
ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index dc860d1bb608..16f65744d984 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
* m_ack_seq is set to the sequence number of the last byte of
* header and data. see rds_tcp_is_acked().
*/
- tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc);
+ tc->t_last_sent_nxt = rds_tcp_write_seq(tc);
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
@@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
- rm, rds_tcp_snd_nxt(tc),
+ rm, rds_tcp_write_seq(tc),
(unsigned long long)rm->m_ack_seq);
}
@@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk)
tc->t_last_seen_una = rds_tcp_snd_una(tc);
rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+ rcu_read_lock();
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
+ !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
+ rcu_read_unlock();
out:
read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index f121daa402c8..eb76db1360b0 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
cp->cp_reconnect_jiffies = 0;
set_bit(0, &cp->cp_conn->c_map_queued);
- queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
- queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ }
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_connect_path_complete);
@@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
if (cp->cp_reconnect_jiffies == 0) {
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
- queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+ rcu_read_unlock();
return;
}
@@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
conn, &conn->c_laddr, &conn->c_faddr);
- queue_delayed_work(rds_wq, &cp->cp_conn_w,
- rand % cp->cp_reconnect_jiffies);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_conn_w,
+ rand % cp->cp_reconnect_jiffies);
+ rcu_read_unlock();
cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
rds_sysctl_reconnect_max_jiffies);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 2064c3a35ef8..124c77e9d058 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1139,10 +1139,10 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
return -ENOMEM;
}
-static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
+static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait)
{
struct rfkill_data *data = file->private_data;
- unsigned int res = POLLOUT | POLLWRNORM;
+ __poll_t res = POLLOUT | POLLWRNORM;
poll_wait(file, &data->read_wait, wait);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 6a5c4992cf61..083bd251406f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1461,7 +1461,6 @@ static int rose_info_open(struct inode *inode, struct file *file)
}
static const struct file_operations rose_info_fops = {
- .owner = THIS_MODULE,
.open = rose_info_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 8ca3124df83f..178619ddab68 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -1156,7 +1156,6 @@ static int rose_nodes_open(struct inode *inode, struct file *file)
}
const struct file_operations rose_nodes_fops = {
- .owner = THIS_MODULE,
.open = rose_nodes_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1240,7 +1239,6 @@ static int rose_neigh_open(struct inode *inode, struct file *file)
}
const struct file_operations rose_neigh_fops = {
- .owner = THIS_MODULE,
.open = rose_neigh_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1326,7 +1324,6 @@ static int rose_route_open(struct inode *inode, struct file *file)
}
const struct file_operations rose_routes_fops = {
- .owner = THIS_MODULE,
.open = rose_route_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9b5c46b052fd..21ad6a3a465c 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
bool upgrade)
{
struct rxrpc_conn_parameters cp;
+ struct rxrpc_call_params p;
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
int ret;
@@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
if (key && !key->payload.data[0])
key = NULL; /* a no-security key */
+ memset(&p, 0, sizeof(p));
+ p.user_call_ID = user_call_ID;
+ p.tx_total_len = tx_total_len;
+
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
cp.key = key;
@@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.exclusive = false;
cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
- gfp);
+ call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp);
/* The socket has been unlocked. */
if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
@@ -725,12 +729,12 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
/*
* permit an RxRPC socket to be polled
*/
-static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
+static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct rxrpc_sock *rx = rxrpc_sk(sk);
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
@@ -856,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
static int rxrpc_release_sock(struct sock *sk)
{
struct rxrpc_sock *rx = rxrpc_sk(sk);
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
_enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
@@ -863,6 +868,19 @@ static int rxrpc_release_sock(struct sock *sk)
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
+ /* We want to kill off all connections from a service socket
+ * as fast as possible because we can't share these; client
+ * sockets, on the other hand, can share an endpoint.
+ */
+ switch (sk->sk_state) {
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_BOUND2:
+ case RXRPC_SERVER_LISTENING:
+ case RXRPC_SERVER_LISTEN_DISABLED:
+ rx->local->service_closed = true;
+ break;
+ }
+
spin_lock_bh(&sk->sk_receive_queue.lock);
sk->sk_state = RXRPC_CLOSE;
spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -878,6 +896,8 @@ static int rxrpc_release_sock(struct sock *sk)
rxrpc_release_calls_on_socket(rx);
flush_workqueue(rxrpc_workqueue);
rxrpc_purge_queue(&sk->sk_receive_queue);
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
+ rxrpc_queue_work(&rxnet->client_conn_reaper);
rxrpc_put_local(rx->local);
rx->local = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index b2151993d384..416688381eb7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -79,17 +79,20 @@ struct rxrpc_net {
struct list_head conn_proc_list; /* List of conns in this namespace for proc */
struct list_head service_conns; /* Service conns in this namespace */
rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
- struct delayed_work service_conn_reaper;
+ struct work_struct service_conn_reaper;
+ struct timer_list service_conn_reap_timer;
unsigned int nr_client_conns;
unsigned int nr_active_client_conns;
bool kill_all_client_conns;
+ bool live;
spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */
struct list_head waiting_client_conns;
struct list_head active_client_conns;
struct list_head idle_client_conns;
- struct delayed_work client_conn_reaper;
+ struct work_struct client_conn_reaper;
+ struct timer_list client_conn_reap_timer;
struct list_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
@@ -265,6 +268,7 @@ struct rxrpc_local {
rwlock_t services_lock; /* lock for services list */
int debug_id; /* debug ID for printks */
bool dead;
+ bool service_closed; /* Service socket closed */
struct sockaddr_rxrpc srx; /* local address */
};
@@ -338,8 +342,17 @@ enum rxrpc_conn_flag {
RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */
RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
+ RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */
+ RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */
+ RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */
+ RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */
};
+#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_1) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_2) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_3))
+
/*
* Events that can be raised upon a connection.
*/
@@ -393,6 +406,7 @@ struct rxrpc_connection {
#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1)
struct list_head waiting_calls; /* Calls waiting for channels */
struct rxrpc_channel {
+ unsigned long final_ack_at; /* Time at which to issue final ACK */
struct rxrpc_call __rcu *call; /* Active call */
u32 call_id; /* ID of current call */
u32 call_counter; /* Call ID counter */
@@ -404,6 +418,7 @@ struct rxrpc_connection {
};
} channels[RXRPC_MAXCALLS];
+ struct timer_list timer; /* Conn event timer */
struct work_struct processor; /* connection event processor */
union {
struct rb_node client_node; /* Node in local->client_conns */
@@ -457,9 +472,10 @@ enum rxrpc_call_flag {
enum rxrpc_call_event {
RXRPC_CALL_EV_ACK, /* need to generate ACK */
RXRPC_CALL_EV_ABORT, /* need to generate abort */
- RXRPC_CALL_EV_TIMER, /* Timer expired */
RXRPC_CALL_EV_RESEND, /* Tx resend required */
RXRPC_CALL_EV_PING, /* Ping send required */
+ RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */
+ RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
};
/*
@@ -503,10 +519,16 @@ struct rxrpc_call {
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock __rcu *socket; /* socket responsible */
struct mutex user_mutex; /* User access mutex */
- ktime_t ack_at; /* When deferred ACK needs to happen */
- ktime_t resend_at; /* When next resend needs to happen */
- ktime_t ping_at; /* When next to send a ping */
- ktime_t expire_at; /* When the call times out */
+ unsigned long ack_at; /* When deferred ACK needs to happen */
+ unsigned long ack_lost_at; /* When ACK is figured as lost */
+ unsigned long resend_at; /* When next resend needs to happen */
+ unsigned long ping_at; /* When next to send a ping */
+ unsigned long keepalive_at; /* When next to send a keepalive ping */
+ unsigned long expect_rx_by; /* When we expect to get a packet by */
+ unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
+ unsigned long expect_term_by; /* When we expect call termination by */
+ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
+ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
struct timer_list timer; /* Combined event timer */
struct work_struct processor; /* Event processor */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -609,6 +631,8 @@ struct rxrpc_call {
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
+ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
+ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
};
/*
@@ -632,6 +656,35 @@ struct rxrpc_ack_summary {
u8 cumulative_acks;
};
+/*
+ * sendmsg() cmsg-specified parameters.
+ */
+enum rxrpc_command {
+ RXRPC_CMD_SEND_DATA, /* send data message */
+ RXRPC_CMD_SEND_ABORT, /* request abort generation */
+ RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
+ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
+};
+
+struct rxrpc_call_params {
+ s64 tx_total_len; /* Total Tx data length (if send data) */
+ unsigned long user_call_ID; /* User's call ID */
+ struct {
+ u32 hard; /* Maximum lifetime (sec) */
+ u32 idle; /* Max time since last data packet (msec) */
+ u32 normal; /* Max time since last call packet (msec) */
+ } timeouts;
+ u8 nr_timeouts; /* Number of timeouts specified */
+};
+
+struct rxrpc_send_params {
+ struct rxrpc_call_params call;
+ u32 abort_code; /* Abort code to Tx (if abort) */
+ enum rxrpc_command command : 8; /* The command to implement */
+ bool exclusive; /* Shared or exclusive call */
+ bool upgrade; /* If the connection is upgradeable */
+};
+
#include <trace/events/rxrpc.h>
/*
@@ -657,12 +710,19 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* call_event.c
*/
-void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
-void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
+static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why)
+{
+ trace_rxrpc_timer(call, why, now);
+ timer_reduce(&call->timer, expire_at);
+}
+
/*
* call_object.c
*/
@@ -672,11 +732,11 @@ extern unsigned int rxrpc_max_call_lifetime;
extern struct kmem_cache *rxrpc_call_jar;
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
-struct rxrpc_call *rxrpc_alloc_call(gfp_t);
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *,
- unsigned long, s64, gfp_t);
+ struct rxrpc_call_params *, gfp_t);
int rxrpc_retry_client_call(struct rxrpc_sock *,
struct rxrpc_call *,
struct rxrpc_conn_parameters *,
@@ -803,8 +863,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
*/
extern unsigned int rxrpc_max_client_connections;
extern unsigned int rxrpc_reap_client_connections;
-extern unsigned int rxrpc_conn_idle_client_expiry;
-extern unsigned int rxrpc_conn_idle_client_fast_expiry;
+extern unsigned long rxrpc_conn_idle_client_expiry;
+extern unsigned long rxrpc_conn_idle_client_fast_expiry;
extern struct idr rxrpc_client_conn_ids;
void rxrpc_destroy_client_conn_ids(void);
@@ -825,6 +885,7 @@ void rxrpc_process_connection(struct work_struct *);
* conn_object.c
*/
extern unsigned int rxrpc_connection_expiry;
+extern unsigned int rxrpc_closed_conn_expiry;
struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
@@ -861,6 +922,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
rxrpc_put_service_conn(conn);
}
+static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
+ unsigned long expire_at)
+{
+ timer_reduce(&conn->timer, expire_at);
+}
+
/*
* conn_service.c
*/
@@ -930,13 +997,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local)
* misc.c
*/
extern unsigned int rxrpc_max_backlog __read_mostly;
-extern unsigned int rxrpc_requested_ack_delay;
-extern unsigned int rxrpc_soft_ack_delay;
-extern unsigned int rxrpc_idle_ack_delay;
+extern unsigned long rxrpc_requested_ack_delay;
+extern unsigned long rxrpc_soft_ack_delay;
+extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
-extern unsigned int rxrpc_resend_timeout;
+extern unsigned long rxrpc_resend_timeout;
extern const s8 rxrpc_ack_priority[];
@@ -954,7 +1021,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *, bool);
+int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
void rxrpc_reject_packets(struct rxrpc_local *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index cbd1701e813a..3028298ca561 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
/* Now it gets complicated, because calls get registered with the
* socket here, particularly if a user ID is preassigned by the user.
*/
- call = rxrpc_alloc_call(gfp);
+ call = rxrpc_alloc_call(rx, gfp);
if (!call)
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 3574508baf9a..ad2ab1103189 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -22,80 +22,6 @@
#include "ar-internal.h"
/*
- * Set the timer
- */
-void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
- ktime_t now)
-{
- unsigned long t_j, now_j = jiffies;
- ktime_t t;
- bool queue = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- t = call->expire_at;
- if (!ktime_after(t, now)) {
- trace_rxrpc_timer(call, why, now, now_j);
- queue = true;
- goto out;
- }
-
- if (!ktime_after(call->resend_at, now)) {
- call->resend_at = call->expire_at;
- if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
- queue = true;
- } else if (ktime_before(call->resend_at, t)) {
- t = call->resend_at;
- }
-
- if (!ktime_after(call->ack_at, now)) {
- call->ack_at = call->expire_at;
- if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
- queue = true;
- } else if (ktime_before(call->ack_at, t)) {
- t = call->ack_at;
- }
-
- if (!ktime_after(call->ping_at, now)) {
- call->ping_at = call->expire_at;
- if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
- queue = true;
- } else if (ktime_before(call->ping_at, t)) {
- t = call->ping_at;
- }
-
- t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
- t_j += jiffies;
-
- /* We have to make sure that the calculated jiffies value falls
- * at or after the nsec value, or we may loop ceaselessly
- * because the timer times out, but we haven't reached the nsec
- * timeout yet.
- */
- t_j++;
-
- if (call->timer.expires != t_j || !timer_pending(&call->timer)) {
- mod_timer(&call->timer, t_j);
- trace_rxrpc_timer(call, why, now, now_j);
- }
- }
-
-out:
- if (queue)
- rxrpc_queue_call(call);
-}
-
-/*
- * Set the timer
- */
-void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
- ktime_t now)
-{
- read_lock_bh(&call->state_lock);
- __rxrpc_set_timer(call, why, now);
- read_unlock_bh(&call->state_lock);
-}
-
-/*
* Propose a PING ACK be sent.
*/
static void rxrpc_propose_ping(struct rxrpc_call *call,
@@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call,
!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
rxrpc_queue_call(call);
} else {
- ktime_t now = ktime_get_real();
- ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay);
+ unsigned long now = jiffies;
+ unsigned long ping_at = now + rxrpc_idle_ack_delay;
- if (ktime_before(ping_at, call->ping_at)) {
- call->ping_at = ping_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now);
+ if (time_before(ping_at, call->ping_at)) {
+ WRITE_ONCE(call->ping_at, ping_at);
+ rxrpc_reduce_call_timer(call, ping_at, now,
+ rxrpc_timer_set_for_ping);
}
}
}
@@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
enum rxrpc_propose_ack_trace why)
{
enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
- unsigned int expiry = rxrpc_soft_ack_delay;
- ktime_t now, ack_at;
+ unsigned long expiry = rxrpc_soft_ack_delay;
s8 prior = rxrpc_ack_priority[ack_reason];
/* Pings are handled specially because we don't want to accidentally
@@ -190,11 +116,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
background)
rxrpc_queue_call(call);
} else {
- now = ktime_get_real();
- ack_at = ktime_add_ms(now, expiry);
- if (ktime_before(ack_at, call->ack_at)) {
- call->ack_at = ack_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now);
+ unsigned long now = jiffies, ack_at;
+
+ if (call->peer->rtt_usage > 0)
+ ack_at = nsecs_to_jiffies(call->peer->rtt);
+ else
+ ack_at = expiry;
+
+ ack_at += now;
+ if (time_before(ack_at, call->ack_at)) {
+ WRITE_ONCE(call->ack_at, ack_at);
+ rxrpc_reduce_call_timer(call, ack_at, now,
+ rxrpc_timer_set_for_ack);
}
}
@@ -227,18 +160,28 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
/*
* Perform retransmission of NAK'd and unack'd packets.
*/
-static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
+static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
+ unsigned long resend_at;
rxrpc_seq_t cursor, seq, top;
- ktime_t max_age, oldest, ack_ts;
+ ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo;
int ix;
u8 annotation, anno_type, retrans = 0, unacked = 0;
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- max_age = ktime_sub_ms(now, rxrpc_resend_timeout);
+ if (call->peer->rtt_usage > 1)
+ timeout = ns_to_ktime(call->peer->rtt * 3 / 2);
+ else
+ timeout = ms_to_ktime(rxrpc_resend_timeout);
+ min_timeo = ns_to_ktime((1000000000 / HZ) * 4);
+ if (ktime_before(timeout, min_timeo))
+ timeout = min_timeo;
+
+ now = ktime_get_real();
+ max_age = ktime_sub(now, timeout);
spin_lock_bh(&call->lock);
@@ -282,7 +225,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
}
- call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
+ resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now)));
+ resend_at += jiffies + rxrpc_resend_timeout;
+ WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
rxrpc_congestion_timeout(call);
@@ -292,14 +237,15 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
* retransmitting data.
*/
if (!retrans) {
- rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_resend);
spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_ns(ack_ts) < call->peer->rtt)
goto out;
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
- rxrpc_send_ack_packet(call, true);
+ rxrpc_send_ack_packet(call, true, NULL);
goto out;
}
@@ -364,7 +310,8 @@ void rxrpc_process_call(struct work_struct *work)
{
struct rxrpc_call *call =
container_of(work, struct rxrpc_call, processor);
- ktime_t now;
+ rxrpc_serial_t *send_ack;
+ unsigned long now, next, t;
rxrpc_see_call(call);
@@ -384,22 +331,89 @@ recheck_state:
goto out_put;
}
- now = ktime_get_real();
- if (ktime_before(call->expire_at, now)) {
+ /* Work out if any timeouts tripped */
+ now = jiffies;
+ t = READ_ONCE(call->expect_rx_by);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
+ set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ }
+
+ t = READ_ONCE(call->expect_req_by);
+ if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
+ time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
+ set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ }
+
+ t = READ_ONCE(call->expect_term_by);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
+ set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ }
+
+ t = READ_ONCE(call->ack_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
+ cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_ACK, &call->events);
+ }
+
+ t = READ_ONCE(call->ack_lost_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
+ cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
+ }
+
+ t = READ_ONCE(call->keepalive_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
+ cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true,
+ rxrpc_propose_ack_ping_for_keepalive);
+ set_bit(RXRPC_CALL_EV_PING, &call->events);
+ }
+
+ t = READ_ONCE(call->ping_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
+ cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_PING, &call->events);
+ }
+
+ t = READ_ONCE(call->resend_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
+ cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_RESEND, &call->events);
+ }
+
+ /* Process events */
+ if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) {
+ send_ack = NULL;
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
+ call->acks_lost_top = call->tx_top;
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ack_ping_for_lost_ack);
+ send_ack = &call->acks_lost_ping;
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
+ send_ack) {
if (call->ackr_reason) {
- rxrpc_send_ack_packet(call, false);
+ rxrpc_send_ack_packet(call, false, send_ack);
goto recheck_state;
}
}
if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) {
- rxrpc_send_ack_packet(call, true);
+ rxrpc_send_ack_packet(call, true, NULL);
goto recheck_state;
}
@@ -408,7 +422,24 @@ recheck_state:
goto recheck_state;
}
- rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
+ /* Make sure the timer is restarted */
+ next = call->expect_rx_by;
+
+#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
+
+ set(call->expect_req_by);
+ set(call->expect_term_by);
+ set(call->ack_at);
+ set(call->ack_lost_at);
+ set(call->resend_at);
+ set(call->keepalive_at);
+ set(call->ping_at);
+
+ now = jiffies;
+ if (time_after_eq(now, next))
+ goto recheck_state;
+
+ rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
/* other events may have been raised since we started checking */
if (call->events && call->state < RXRPC_CALL_COMPLETE) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 994dc2df57e4..0b2db38dd32d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -51,10 +51,14 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
- if (call->state < RXRPC_CALL_COMPLETE)
- rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real());
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
+ rxrpc_queue_call(call);
+ }
}
+static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
+
/*
* find an extant server call
* - called in process context with IRQs enabled
@@ -95,7 +99,7 @@ found_extant_call:
/*
* allocate a new call
*/
-struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
{
struct rxrpc_call *call;
@@ -114,6 +118,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
goto nomem_2;
mutex_init(&call->user_mutex);
+
+ /* Prevent lockdep reporting a deadlock false positive between the afs
+ * filesystem and sys_sendmsg() via the mmap sem.
+ */
+ if (rx->sk.sk_kern_sock)
+ lockdep_set_class(&call->user_mutex,
+ &rxrpc_call_user_mutex_lock_class_key);
+
timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
INIT_WORK(&call->processor, &rxrpc_process_call);
INIT_LIST_HEAD(&call->link);
@@ -128,6 +140,8 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
atomic_set(&call->usage, 1);
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
call->tx_total_len = -1;
+ call->next_rx_timo = 20 * HZ;
+ call->next_req_timo = 1 * HZ;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -150,7 +164,8 @@ nomem:
/*
* Allocate a new client call.
*/
-static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
+static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
+ struct sockaddr_rxrpc *srx,
gfp_t gfp)
{
struct rxrpc_call *call;
@@ -158,7 +173,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
_enter("");
- call = rxrpc_alloc_call(gfp);
+ call = rxrpc_alloc_call(rx, gfp);
if (!call)
return ERR_PTR(-ENOMEM);
call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
@@ -177,15 +192,17 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
*/
static void rxrpc_start_call_timer(struct rxrpc_call *call)
{
- ktime_t now = ktime_get_real(), expire_at;
-
- expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime);
- call->expire_at = expire_at;
- call->ack_at = expire_at;
- call->ping_at = expire_at;
- call->resend_at = expire_at;
- call->timer.expires = jiffies + LONG_MAX / 2;
- rxrpc_set_timer(call, rxrpc_timer_begin, now);
+ unsigned long now = jiffies;
+ unsigned long j = now + MAX_JIFFY_OFFSET;
+
+ call->ack_at = j;
+ call->ack_lost_at = j;
+ call->resend_at = j;
+ call->ping_at = j;
+ call->expect_rx_by = j;
+ call->expect_req_by = j;
+ call->expect_term_by = j;
+ call->timer.expires = now;
}
/*
@@ -196,8 +213,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
struct sockaddr_rxrpc *srx,
- unsigned long user_call_ID,
- s64 tx_total_len,
+ struct rxrpc_call_params *p,
gfp_t gfp)
__releases(&rx->sk.sk_lock.slock)
{
@@ -207,18 +223,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
const void *here = __builtin_return_address(0);
int ret;
- _enter("%p,%lx", rx, user_call_ID);
+ _enter("%p,%lx", rx, p->user_call_ID);
- call = rxrpc_alloc_client_call(srx, gfp);
+ call = rxrpc_alloc_client_call(rx, srx, gfp);
if (IS_ERR(call)) {
release_sock(&rx->sk);
_leave(" = %ld", PTR_ERR(call));
return call;
}
- call->tx_total_len = tx_total_len;
+ call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
- here, (const void *)user_call_ID);
+ here, (const void *)p->user_call_ID);
/* We need to protect a partially set up call against the user as we
* will be acting outside the socket lock.
@@ -234,16 +250,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
parent = *pp;
xcall = rb_entry(parent, struct rxrpc_call, sock_node);
- if (user_call_ID < xcall->user_call_ID)
+ if (p->user_call_ID < xcall->user_call_ID)
pp = &(*pp)->rb_left;
- else if (user_call_ID > xcall->user_call_ID)
+ else if (p->user_call_ID > xcall->user_call_ID)
pp = &(*pp)->rb_right;
else
goto error_dup_user_ID;
}
rcu_assign_pointer(call->socket, rx);
- call->user_call_ID = user_call_ID;
+ call->user_call_ID = p->user_call_ID;
__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
rxrpc_get_call(call, rxrpc_call_got_userid);
rb_link_node(&call->sock_node, parent, pp);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 5f9624bd311c..7f74ca3059f8 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -85,8 +85,8 @@
__read_mostly unsigned int rxrpc_max_client_connections = 1000;
__read_mostly unsigned int rxrpc_reap_client_connections = 900;
-__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
-__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
/*
* We use machine-unique IDs for our client connections.
@@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
+ /* Cancel the final ACK on the previous call if it hasn't been sent yet
+ * as the DATA packet will implicitly ACK it.
+ */
+ clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
+
write_lock_bh(&call->state_lock);
if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
@@ -686,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call,
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
- rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work);
+ rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
rxrpc_cull_active_client_conns(rxnet);
ret = rxrpc_get_client_conn(call, cp, srx, gfp);
@@ -752,6 +757,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
}
/*
+ * Set the reap timer.
+ */
+static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
+{
+ unsigned long now = jiffies;
+ unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
+
+ if (rxnet->live)
+ timer_reduce(&rxnet->client_conn_reap_timer, reap_at);
+}
+
+/*
* Disconnect a client call.
*/
void rxrpc_disconnect_client_call(struct rxrpc_call *call)
@@ -813,6 +830,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
goto out_2;
}
+ /* Schedule the final ACK to be transmitted in a short while so that it
+ * can be skipped if we find a follow-on call. The first DATA packet
+ * of the follow on call will implicitly ACK this call.
+ */
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ unsigned long final_ack_at = jiffies + 2;
+
+ WRITE_ONCE(chan->final_ack_at, final_ack_at);
+ smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
+ set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
+ rxrpc_reduce_conn_timer(conn, final_ack_at);
+ }
+
/* Things are more complex and we need the cache lock. We might be
* able to simply idle the conn or it might now be lurking on the wait
* list. It might even get moved back to the active list whilst we're
@@ -878,9 +908,7 @@ idle_connection:
list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
if (rxnet->idle_client_conns.next == &conn->cache_link &&
!rxnet->kill_all_client_conns)
- queue_delayed_work(rxrpc_workqueue,
- &rxnet->client_conn_reaper,
- rxrpc_conn_idle_client_expiry);
+ rxrpc_set_client_reap_timer(rxnet);
} else {
trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
@@ -1018,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet =
- container_of(to_delayed_work(work),
- struct rxrpc_net, client_conn_reaper);
+ container_of(work, struct rxrpc_net, client_conn_reaper);
unsigned long expiry, conn_expires_at, now;
unsigned int nr_conns;
bool did_discard = false;
@@ -1061,6 +1088,8 @@ next:
expiry = rxrpc_conn_idle_client_expiry;
if (nr_conns > rxrpc_reap_client_connections)
expiry = rxrpc_conn_idle_client_fast_expiry;
+ if (conn->params.local->service_closed)
+ expiry = rxrpc_closed_conn_expiry * HZ;
conn_expires_at = conn->idle_timestamp + expiry;
@@ -1096,9 +1125,8 @@ not_yet_expired:
*/
_debug("not yet");
if (!rxnet->kill_all_client_conns)
- queue_delayed_work(rxrpc_workqueue,
- &rxnet->client_conn_reaper,
- conn_expires_at - now);
+ timer_reduce(&rxnet->client_conn_reap_timer,
+ conn_expires_at);
out:
spin_unlock(&rxnet->client_conn_cache_lock);
@@ -1118,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
rxnet->kill_all_client_conns = true;
spin_unlock(&rxnet->client_conn_cache_lock);
- cancel_delayed_work(&rxnet->client_conn_reaper);
+ del_timer_sync(&rxnet->client_conn_reap_timer);
- if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0))
+ if (!rxrpc_queue_work(&rxnet->client_conn_reaper))
_debug("destroy: queue failed");
_leave("");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 59a51a56e7c8..4ca11be6be3c 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -24,31 +24,28 @@
* Retransmit terminal ACK or ABORT of the previous call.
*/
static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ unsigned int channel)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
struct rxrpc_channel *chan;
struct msghdr msg;
- struct kvec iov;
+ struct kvec iov[3];
struct {
struct rxrpc_wire_header whdr;
union {
- struct {
- __be32 code;
- } abort;
- struct {
- struct rxrpc_ackpacket ack;
- u8 padding[3];
- struct rxrpc_ackinfo info;
- };
+ __be32 abort_code;
+ struct rxrpc_ackpacket ack;
};
} __attribute__((packed)) pkt;
+ struct rxrpc_ackinfo ack_info;
size_t len;
- u32 serial, mtu, call_id;
+ int ioc;
+ u32 serial, mtu, call_id, padding;
_enter("%d", conn->debug_id);
- chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
+ chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
* ignore this packet.
@@ -56,7 +53,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
call_id = READ_ONCE(chan->last_call);
/* Sync with __rxrpc_disconnect_call() */
smp_rmb();
- if (call_id != sp->hdr.callNumber)
+ if (skb && call_id != sp->hdr.callNumber)
return;
msg.msg_name = &conn->params.peer->srx.transport;
@@ -65,9 +62,16 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
msg.msg_controllen = 0;
msg.msg_flags = 0;
- pkt.whdr.epoch = htonl(sp->hdr.epoch);
- pkt.whdr.cid = htonl(sp->hdr.cid);
- pkt.whdr.callNumber = htonl(sp->hdr.callNumber);
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = sizeof(pkt.whdr);
+ iov[1].iov_base = &padding;
+ iov[1].iov_len = 3;
+ iov[2].iov_base = &ack_info;
+ iov[2].iov_len = sizeof(ack_info);
+
+ pkt.whdr.epoch = htonl(conn->proto.epoch);
+ pkt.whdr.cid = htonl(conn->proto.cid);
+ pkt.whdr.callNumber = htonl(call_id);
pkt.whdr.seq = 0;
pkt.whdr.type = chan->last_type;
pkt.whdr.flags = conn->out_clientflag;
@@ -79,27 +83,35 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
len = sizeof(pkt.whdr);
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
- pkt.abort.code = htonl(chan->last_abort);
- len += sizeof(pkt.abort);
+ pkt.abort_code = htonl(chan->last_abort);
+ iov[0].iov_len += sizeof(pkt.abort_code);
+ len += sizeof(pkt.abort_code);
+ ioc = 1;
break;
case RXRPC_PACKET_TYPE_ACK:
mtu = conn->params.peer->if_mtu;
mtu -= conn->params.peer->hdrsize;
pkt.ack.bufferSpace = 0;
- pkt.ack.maxSkew = htons(skb->priority);
- pkt.ack.firstPacket = htonl(chan->last_seq);
- pkt.ack.previousPacket = htonl(chan->last_seq - 1);
- pkt.ack.serial = htonl(sp->hdr.serial);
- pkt.ack.reason = RXRPC_ACK_DUPLICATE;
+ pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
+ pkt.ack.firstPacket = htonl(chan->last_seq + 1);
+ pkt.ack.previousPacket = htonl(chan->last_seq);
+ pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
+ pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
- pkt.info.rxMTU = htonl(rxrpc_rx_mtu);
- pkt.info.maxMTU = htonl(mtu);
- pkt.info.rwind = htonl(rxrpc_rx_window_size);
- pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ ack_info.rxMTU = htonl(rxrpc_rx_mtu);
+ ack_info.maxMTU = htonl(mtu);
+ ack_info.rwind = htonl(rxrpc_rx_window_size);
+ ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
- len += sizeof(pkt.ack) + sizeof(pkt.info);
+ padding = 0;
+ iov[0].iov_len += sizeof(pkt.ack);
+ len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+ ioc = 3;
break;
+
+ default:
+ return;
}
/* Resync with __rxrpc_disconnect_call() and check that the last call
@@ -109,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
if (READ_ONCE(chan->last_call) != call_id)
return;
- iov.iov_base = &pkt;
- iov.iov_len = len;
-
serial = atomic_inc_return(&conn->serial);
pkt.whdr.serial = htonl(serial);
@@ -126,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
break;
}
- kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+ kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
_leave("");
return;
}
@@ -272,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb);
+ rxrpc_conn_retransmit_call(conn, skb,
+ sp->hdr.cid & RXRPC_CHANNELMASK);
return 0;
case RXRPC_PACKET_TYPE_BUSY:
@@ -379,6 +389,48 @@ abort:
}
/*
+ * Process delayed final ACKs that we haven't subsumed into a subsequent call.
+ */
+static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn)
+{
+ unsigned long j = jiffies, next_j;
+ unsigned int channel;
+ bool set;
+
+again:
+ next_j = j + LONG_MAX;
+ set = false;
+ for (channel = 0; channel < RXRPC_MAXCALLS; channel++) {
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ unsigned long ack_at;
+
+ if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
+ continue;
+
+ smp_rmb(); /* vs rxrpc_disconnect_client_call */
+ ack_at = READ_ONCE(chan->final_ack_at);
+
+ if (time_before(j, ack_at)) {
+ if (time_before(ack_at, next_j)) {
+ next_j = ack_at;
+ set = true;
+ }
+ continue;
+ }
+
+ if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel,
+ &conn->flags))
+ rxrpc_conn_retransmit_call(conn, NULL, channel);
+ }
+
+ j = jiffies;
+ if (time_before_eq(next_j, j))
+ goto again;
+ if (set)
+ rxrpc_reduce_conn_timer(conn, next_j);
+}
+
+/*
* connection-level event processor
*/
void rxrpc_process_connection(struct work_struct *work)
@@ -394,6 +446,10 @@ void rxrpc_process_connection(struct work_struct *work)
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
+ /* Process delayed ACKs whose time has come. */
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn);
+
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index fe575798592f..c628351eb900 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -20,10 +20,19 @@
/*
* Time till a connection expires after last use (in seconds).
*/
-unsigned int rxrpc_connection_expiry = 10 * 60;
+unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60;
+unsigned int __read_mostly rxrpc_closed_conn_expiry = 10;
static void rxrpc_destroy_connection(struct rcu_head *);
+static void rxrpc_connection_timer(struct timer_list *timer)
+{
+ struct rxrpc_connection *conn =
+ container_of(timer, struct rxrpc_connection, timer);
+
+ rxrpc_queue_conn(conn);
+}
+
/*
* allocate a new connection
*/
@@ -38,6 +47,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
INIT_LIST_HEAD(&conn->cache_link);
spin_lock_init(&conn->channel_lock);
INIT_LIST_HEAD(&conn->waiting_calls);
+ timer_setup(&conn->timer, &rxrpc_connection_timer, 0);
INIT_WORK(&conn->processor, &rxrpc_process_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
@@ -301,21 +311,29 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
}
/*
+ * Set the service connection reap timer.
+ */
+static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
+ unsigned long reap_at)
+{
+ if (rxnet->live)
+ timer_reduce(&rxnet->service_conn_reap_timer, reap_at);
+}
+
+/*
* Release a service connection
*/
void rxrpc_put_service_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_net *rxnet;
const void *here = __builtin_return_address(0);
int n;
n = atomic_dec_return(&conn->usage);
trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
ASSERTCMP(n, >=, 0);
- if (n == 0) {
- rxnet = conn->params.local->rxnet;
- rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0);
- }
+ if (n == 1)
+ rxrpc_set_service_reap_timer(conn->params.local->rxnet,
+ jiffies + rxrpc_connection_expiry);
}
/*
@@ -332,6 +350,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
_net("DESTROY CONN %d", conn->debug_id);
+ del_timer_sync(&conn->timer);
rxrpc_purge_queue(&conn->rx_queue);
conn->security->clear(conn);
@@ -351,17 +370,15 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
struct rxrpc_net *rxnet =
- container_of(to_delayed_work(work),
- struct rxrpc_net, service_conn_reaper);
- unsigned long reap_older_than, earliest, idle_timestamp, now;
+ container_of(work, struct rxrpc_net, service_conn_reaper);
+ unsigned long expire_at, earliest, idle_timestamp, now;
LIST_HEAD(graveyard);
_enter("");
now = jiffies;
- reap_older_than = now - rxrpc_connection_expiry * HZ;
- earliest = ULONG_MAX;
+ earliest = now + MAX_JIFFY_OFFSET;
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
@@ -371,15 +388,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
continue;
- idle_timestamp = READ_ONCE(conn->idle_timestamp);
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
- (long)reap_older_than - (long)idle_timestamp);
-
- if (time_after(idle_timestamp, reap_older_than)) {
- if (time_before(idle_timestamp, earliest))
- earliest = idle_timestamp;
- continue;
+ if (rxnet->live) {
+ idle_timestamp = READ_ONCE(conn->idle_timestamp);
+ expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
+ if (conn->params.local->service_closed)
+ expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
+
+ _debug("reap CONN %d { u=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->usage),
+ (long)expire_at - (long)now);
+
+ if (time_before(now, expire_at)) {
+ if (time_before(expire_at, earliest))
+ earliest = expire_at;
+ continue;
+ }
}
/* The usage count sits at 1 whilst the object is unused on the
@@ -387,6 +410,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
*/
if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
continue;
+ trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0);
if (rxrpc_conn_is_client(conn))
BUG();
@@ -397,11 +421,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
}
write_unlock(&rxnet->conn_lock);
- if (earliest != ULONG_MAX) {
- _debug("reschedule reaper %ld", (long) earliest - now);
+ if (earliest != now + MAX_JIFFY_OFFSET) {
+ _debug("reschedule reaper %ld", (long)earliest - (long)now);
ASSERT(time_after(earliest, now));
- rxrpc_queue_delayed_work(&rxnet->client_conn_reaper,
- earliest - now);
+ rxrpc_set_service_reap_timer(rxnet, earliest);
}
while (!list_empty(&graveyard)) {
@@ -429,9 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
rxrpc_destroy_all_client_connections(rxnet);
- rxrpc_connection_expiry = 0;
- cancel_delayed_work(&rxnet->client_conn_reaper);
- rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0);
+ del_timer_sync(&rxnet->service_conn_reap_timer);
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
flush_workqueue(rxrpc_workqueue);
write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1b592073ec96..6fc61400337f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -318,16 +318,18 @@ bad_state:
static bool rxrpc_receiving_reply(struct rxrpc_call *call)
{
struct rxrpc_ack_summary summary = { 0 };
+ unsigned long now, timo;
rxrpc_seq_t top = READ_ONCE(call->tx_top);
if (call->ackr_reason) {
spin_lock_bh(&call->lock);
call->ackr_reason = 0;
- call->resend_at = call->expire_at;
- call->ack_at = call->expire_at;
spin_unlock_bh(&call->lock);
- rxrpc_set_timer(call, rxrpc_timer_init_for_reply,
- ktime_get_real());
+ now = jiffies;
+ timo = now + MAX_JIFFY_OFFSET;
+ WRITE_ONCE(call->resend_at, timo);
+ WRITE_ONCE(call->ack_at, timo);
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
@@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
if (state >= RXRPC_CALL_COMPLETE)
return;
+ if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ unsigned long timo = READ_ONCE(call->next_req_timo);
+ unsigned long now, expect_req_by;
+
+ if (timo) {
+ now = jiffies;
+ expect_req_by = now + timo;
+ WRITE_ONCE(call->expect_req_by, expect_req_by);
+ rxrpc_reduce_call_timer(call, expect_req_by, now,
+ rxrpc_timer_set_for_idle);
+ }
+ }
+
/* Received data implicitly ACKs all of the request packets we sent
* when we're acting as a client.
*/
@@ -616,6 +631,43 @@ found:
}
/*
+ * Process the response to a ping that we sent to find out if we lost an ACK.
+ *
+ * If we got back a ping response that indicates a lower tx_top than what we
+ * had at the time of the ping transmission, we adjudge all the DATA packets
+ * sent between the response tx_top and the ping-time tx_top to have been lost.
+ */
+static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call)
+{
+ rxrpc_seq_t top, bottom, seq;
+ bool resend = false;
+
+ spin_lock_bh(&call->lock);
+
+ bottom = call->tx_hard_ack + 1;
+ top = call->acks_lost_top;
+ if (before(bottom, top)) {
+ for (seq = bottom; before_eq(seq, top); seq++) {
+ int ix = seq & RXRPC_RXTX_BUFF_MASK;
+ u8 annotation = call->rxtx_annotations[ix];
+ u8 anno_type = annotation & RXRPC_TX_ANNO_MASK;
+
+ if (anno_type != RXRPC_TX_ANNO_UNACK)
+ continue;
+ annotation &= ~RXRPC_TX_ANNO_MASK;
+ annotation |= RXRPC_TX_ANNO_RETRANS;
+ call->rxtx_annotations[ix] = annotation;
+ resend = true;
+ }
+ }
+
+ spin_unlock_bh(&call->lock);
+
+ if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ rxrpc_queue_call(call);
+}
+
+/*
* Process a ping response.
*/
static void rxrpc_input_ping_response(struct rxrpc_call *call,
@@ -630,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call,
smp_rmb();
ping_serial = call->ping_serial;
+ if (orig_serial == call->acks_lost_ping)
+ rxrpc_input_check_for_lost_ack(call);
+
if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
before(orig_serial, ping_serial))
return;
@@ -908,9 +963,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
struct sk_buff *skb, u16 skew)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned long timo;
_enter("%p,%p", call, skb);
+ timo = READ_ONCE(call->next_rx_timo);
+ if (timo) {
+ unsigned long now = jiffies, expect_rx_by;
+
+ expect_rx_by = jiffies + timo;
+ WRITE_ONCE(call->expect_rx_by, expect_rx_by);
+ rxrpc_reduce_call_timer(call, expect_rx_by, now,
+ rxrpc_timer_set_for_normal);
+ }
+
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
rxrpc_input_data(call, skb, skew);
@@ -1147,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
goto reupgrade;
conn->service_id = sp->hdr.serviceId;
}
-
+
if (sp->hdr.callNumber == 0) {
/* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 1a2d4b112064..c1d9e7fd7448 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -21,33 +21,28 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
- * Maximum lifetime of a call (in mx).
- */
-unsigned int rxrpc_max_call_lifetime = 60 * 1000;
-
-/*
* How long to wait before scheduling ACK generation after seeing a
- * packet with RXRPC_REQUEST_ACK set (in ms).
+ * packet with RXRPC_REQUEST_ACK set (in jiffies).
*/
-unsigned int rxrpc_requested_ack_delay = 1;
+unsigned long rxrpc_requested_ack_delay = 1;
/*
- * How long to wait before scheduling an ACK with subtype DELAY (in ms).
+ * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
*
* We use this when we've received new data packets. If those packets aren't
* all consumed within this time we will send a DELAY ACK if an ACK was not
* requested to let the sender know it doesn't need to resend.
*/
-unsigned int rxrpc_soft_ack_delay = 1 * 1000;
+unsigned long rxrpc_soft_ack_delay = HZ;
/*
- * How long to wait before scheduling an ACK with subtype IDLE (in ms).
+ * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
*
* We use this when we've consumed some previously soft-ACK'd packets when
* further packets aren't immediately received to decide when to send an IDLE
* ACK let the other end know that it can free up its Tx buffer space.
*/
-unsigned int rxrpc_idle_ack_delay = 0.5 * 1000;
+unsigned long rxrpc_idle_ack_delay = HZ / 2;
/*
* Receive window size in packets. This indicates the maximum number of
@@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4;
/*
* Time till packet resend (in milliseconds).
*/
-unsigned int rxrpc_resend_timeout = 4 * 1000;
+unsigned long rxrpc_resend_timeout = 4 * HZ;
const s8 rxrpc_ack_priority[] = {
[0] = 0,
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 7edceb8522f5..f18c9248e0d4 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -14,6 +14,24 @@
unsigned int rxrpc_net_id;
+static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_net *rxnet =
+ container_of(timer, struct rxrpc_net, client_conn_reap_timer);
+
+ if (rxnet->live)
+ rxrpc_queue_work(&rxnet->client_conn_reaper);
+}
+
+static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_net *rxnet =
+ container_of(timer, struct rxrpc_net, service_conn_reap_timer);
+
+ if (rxnet->live)
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
+}
+
/*
* Initialise a per-network namespace record.
*/
@@ -22,6 +40,7 @@ static __net_init int rxrpc_init_net(struct net *net)
struct rxrpc_net *rxnet = rxrpc_net(net);
int ret;
+ rxnet->live = true;
get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
rxnet->epoch |= RXRPC_RANDOM_EPOCH;
@@ -31,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net)
INIT_LIST_HEAD(&rxnet->conn_proc_list);
INIT_LIST_HEAD(&rxnet->service_conns);
rwlock_init(&rxnet->conn_lock);
- INIT_DELAYED_WORK(&rxnet->service_conn_reaper,
- rxrpc_service_connection_reaper);
+ INIT_WORK(&rxnet->service_conn_reaper,
+ rxrpc_service_connection_reaper);
+ timer_setup(&rxnet->service_conn_reap_timer,
+ rxrpc_service_conn_reap_timeout, 0);
rxnet->nr_client_conns = 0;
rxnet->nr_active_client_conns = 0;
@@ -42,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net)
INIT_LIST_HEAD(&rxnet->waiting_client_conns);
INIT_LIST_HEAD(&rxnet->active_client_conns);
INIT_LIST_HEAD(&rxnet->idle_client_conns);
- INIT_DELAYED_WORK(&rxnet->client_conn_reaper,
- rxrpc_discard_expired_client_conns);
+ INIT_WORK(&rxnet->client_conn_reaper,
+ rxrpc_discard_expired_client_conns);
+ timer_setup(&rxnet->client_conn_reap_timer,
+ rxrpc_client_conn_reap_timeout, 0);
INIT_LIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
@@ -60,6 +83,7 @@ static __net_init int rxrpc_init_net(struct net *net)
return 0;
err_proc:
+ rxnet->live = false;
return ret;
}
@@ -70,6 +94,7 @@ static __net_exit void rxrpc_exit_net(struct net *net)
{
struct rxrpc_net *rxnet = rxrpc_net(net);
+ rxnet->live = false;
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
rxrpc_destroy_all_locals(rxnet);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f47659c7b224..42410e910aff 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -33,6 +33,24 @@ struct rxrpc_abort_buffer {
};
/*
+ * Arrange for a keepalive ping a certain time after we last transmitted. This
+ * lets the far side know we're still interested in this call and helps keep
+ * the route through any intervening firewall open.
+ *
+ * Receiving a response to the ping will prevent the ->expect_rx_by timer from
+ * expiring.
+ */
+static void rxrpc_set_keepalive(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
+
+ keepalive_at += now;
+ WRITE_ONCE(call->keepalive_at, keepalive_at);
+ rxrpc_reduce_call_timer(call, keepalive_at, now,
+ rxrpc_timer_set_for_keepalive);
+}
+
+/*
* Fill out an ACK packet.
*/
static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
@@ -95,7 +113,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
/*
* Send an ACK call packet.
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
+int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
+ rxrpc_serial_t *_serial)
{
struct rxrpc_connection *conn = NULL;
struct rxrpc_ack_buffer *pkt;
@@ -165,6 +184,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
ntohl(pkt->ack.firstPacket),
ntohl(pkt->ack.serial),
pkt->ack.reason, pkt->ack.nAcks);
+ if (_serial)
+ *_serial = serial;
if (ping) {
call->ping_serial = serial;
@@ -202,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
call->ackr_seen = top;
spin_unlock_bh(&call->lock);
}
+
+ rxrpc_set_keepalive(call);
}
out:
@@ -323,7 +346,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
* ACKs if a DATA packet appears to have been lost.
*/
if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
- (retrans ||
+ (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
+ retrans ||
call->cong_mode == RXRPC_CALL_SLOW_START ||
(call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
@@ -370,8 +394,23 @@ done:
if (whdr.flags & RXRPC_REQUEST_ACK) {
call->peer->rtt_last_req = now;
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
+ if (call->peer->rtt_usage > 1) {
+ unsigned long nowj = jiffies, ack_lost_at;
+
+ ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
+ if (ack_lost_at < 1)
+ ack_lost_at = 1;
+
+ ack_lost_at += nowj;
+ WRITE_ONCE(call->ack_lost_at, ack_lost_at);
+ rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
+ rxrpc_timer_set_for_lost_ack);
+ }
}
}
+
+ rxrpc_set_keepalive(call);
+
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 7421656963a9..f79f260c6ddc 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -125,7 +125,6 @@ static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
}
const struct file_operations rxrpc_call_seq_fops = {
- .owner = THIS_MODULE,
.open = rxrpc_call_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -217,7 +216,6 @@ static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
}
const struct file_operations rxrpc_connection_seq_fops = {
- .owner = THIS_MODULE,
.open = rxrpc_connection_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 8510a98b87e1..cc21e8db25b0 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
+#if 0 // TODO: May want to transmit final ACK under some circumstances anyway
if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
rxrpc_propose_ack_terminal_ack);
- rxrpc_send_ack_packet(call, false);
+ rxrpc_send_ack_packet(call, false, NULL);
}
+#endif
write_lock_bh(&call->state_lock);
@@ -161,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
case RXRPC_CALL_SERVER_RECV_REQUEST:
call->tx_phase = true;
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
- call->ack_at = call->expire_at;
+ call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
write_unlock_bh(&call->state_lock);
rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
rxrpc_propose_ack_processing_op);
@@ -217,10 +219,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
after_eq(top, call->ackr_seen + 2) ||
(hard_ack == top && after(hard_ack, call->ackr_consumed)))
rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
- true, false,
+ true, true,
rxrpc_propose_ack_rotate_rx);
- if (call->ackr_reason)
- rxrpc_send_ack_packet(call, false);
+ if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
+ rxrpc_send_ack_packet(call, false, NULL);
}
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 7d2595582c09..09f2a3e05221 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -21,22 +21,6 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-enum rxrpc_command {
- RXRPC_CMD_SEND_DATA, /* send data message */
- RXRPC_CMD_SEND_ABORT, /* request abort generation */
- RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
- RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
-};
-
-struct rxrpc_send_params {
- s64 tx_total_len; /* Total Tx data length (if send data) */
- unsigned long user_call_ID; /* User's call ID */
- u32 abort_code; /* Abort code to Tx (if abort) */
- enum rxrpc_command command : 8; /* The command to implement */
- bool exclusive; /* Shared or exclusive call */
- bool upgrade; /* If the connection is upgradeable */
-};
-
/*
* Wait for space to appear in the Tx queue or a signal to occur.
*/
@@ -174,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
rxrpc_notify_end_tx_t notify_end_tx)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned long now;
rxrpc_seq_t seq = sp->hdr.seq;
int ret, ix;
u8 annotation = RXRPC_TX_ANNO_UNACK;
@@ -213,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
break;
case RXRPC_CALL_SERVER_ACK_REQUEST:
call->state = RXRPC_CALL_SERVER_SEND_REPLY;
- call->ack_at = call->expire_at;
+ now = jiffies;
+ WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET);
if (call->ackr_reason == RXRPC_ACK_DELAY)
call->ackr_reason = 0;
- __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply,
- ktime_get_real());
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
if (!last)
break;
/* Fall through */
@@ -239,14 +224,19 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
_debug("need instant resend %d", ret);
rxrpc_instant_resend(call, ix);
} else {
- ktime_t now = ktime_get_real(), resend_at;
-
- resend_at = ktime_add_ms(now, rxrpc_resend_timeout);
-
- if (ktime_before(resend_at, call->resend_at)) {
- call->resend_at = resend_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_send, now);
- }
+ unsigned long now = jiffies, resend_at;
+
+ if (call->peer->rtt_usage > 1)
+ resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2);
+ else
+ resend_at = rxrpc_resend_timeout;
+ if (resend_at < 1)
+ resend_at = 1;
+
+ resend_at += now;
+ WRITE_ONCE(call->resend_at, resend_at);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_send);
}
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
@@ -295,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
- rxrpc_send_ack_packet(call, false);
+ rxrpc_send_ack_packet(call, false, NULL);
if (!skb) {
size_t size, chunk, max, space;
@@ -480,11 +470,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
if (msg->msg_flags & MSG_CMSG_COMPAT) {
if (len != sizeof(u32))
return -EINVAL;
- p->user_call_ID = *(u32 *)CMSG_DATA(cmsg);
+ p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg);
} else {
if (len != sizeof(unsigned long))
return -EINVAL;
- p->user_call_ID = *(unsigned long *)
+ p->call.user_call_ID = *(unsigned long *)
CMSG_DATA(cmsg);
}
got_user_ID = true;
@@ -522,11 +512,24 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
break;
case RXRPC_TX_LENGTH:
- if (p->tx_total_len != -1 || len != sizeof(__s64))
+ if (p->call.tx_total_len != -1 || len != sizeof(__s64))
+ return -EINVAL;
+ p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
+ if (p->call.tx_total_len < 0)
return -EINVAL;
- p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
- if (p->tx_total_len < 0)
+ break;
+
+ case RXRPC_SET_CALL_TIMEOUT:
+ if (len & 3 || len < 4 || len > 12)
return -EINVAL;
+ memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len);
+ p->call.nr_timeouts = len / 4;
+ if (p->call.timeouts.hard > INT_MAX / HZ)
+ return -ERANGE;
+ if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000)
+ return -ERANGE;
+ if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000)
+ return -ERANGE;
break;
default:
@@ -536,7 +539,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
if (!got_user_ID)
return -EINVAL;
- if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
+ if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
return -EINVAL;
_leave(" = 0");
return 0;
@@ -576,8 +579,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
cp.exclusive = rx->exclusive | p->exclusive;
cp.upgrade = p->upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID,
- p->tx_total_len, GFP_KERNEL);
+ call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL);
/* The socket is now unlocked */
_leave(" = %p\n", call);
@@ -594,15 +596,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
{
enum rxrpc_call_state state;
struct rxrpc_call *call;
+ unsigned long now, j;
int ret;
struct rxrpc_send_params p = {
- .tx_total_len = -1,
- .user_call_ID = 0,
- .abort_code = 0,
- .command = RXRPC_CMD_SEND_DATA,
- .exclusive = false,
- .upgrade = true,
+ .call.tx_total_len = -1,
+ .call.user_call_ID = 0,
+ .call.nr_timeouts = 0,
+ .abort_code = 0,
+ .command = RXRPC_CMD_SEND_DATA,
+ .exclusive = false,
+ .upgrade = false,
};
_enter("");
@@ -615,15 +619,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = -EINVAL;
if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
goto error_release_sock;
- call = rxrpc_accept_call(rx, p.user_call_ID, NULL);
+ call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL);
/* The socket is now unlocked. */
if (IS_ERR(call))
return PTR_ERR(call);
- rxrpc_put_call(call, rxrpc_call_put);
- return 0;
+ ret = 0;
+ goto out_put_unlock;
}
- call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID);
+ call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID);
if (!call) {
ret = -EBADSLT;
if (p.command != RXRPC_CMD_SEND_DATA)
@@ -653,14 +657,39 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
goto error_put;
}
- if (p.tx_total_len != -1) {
+ if (p.call.tx_total_len != -1) {
ret = -EINVAL;
if (call->tx_total_len != -1 ||
call->tx_pending ||
call->tx_top != 0)
goto error_put;
- call->tx_total_len = p.tx_total_len;
+ call->tx_total_len = p.call.tx_total_len;
+ }
+ }
+
+ switch (p.call.nr_timeouts) {
+ case 3:
+ j = msecs_to_jiffies(p.call.timeouts.normal);
+ if (p.call.timeouts.normal > 0 && j == 0)
+ j = 1;
+ WRITE_ONCE(call->next_rx_timo, j);
+ /* Fall through */
+ case 2:
+ j = msecs_to_jiffies(p.call.timeouts.idle);
+ if (p.call.timeouts.idle > 0 && j == 0)
+ j = 1;
+ WRITE_ONCE(call->next_req_timo, j);
+ /* Fall through */
+ case 1:
+ if (p.call.timeouts.hard > 0) {
+ j = msecs_to_jiffies(p.call.timeouts.hard);
+ now = jiffies;
+ j += now;
+ WRITE_ONCE(call->expect_term_by, j);
+ rxrpc_reduce_call_timer(call, j, now,
+ rxrpc_timer_set_for_hard);
}
+ break;
}
state = READ_ONCE(call->state);
@@ -689,6 +718,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_data(rx, call, msg, len, NULL);
}
+out_put_unlock:
mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 34c706d2f79c..4a7af7aff37d 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -21,6 +21,8 @@ static const unsigned int four = 4;
static const unsigned int thirtytwo = 32;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
+static const unsigned long one_jiffy = 1;
+static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
/*
* RxRPC operating parameters.
@@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
* information on the individual parameters.
*/
static struct ctl_table rxrpc_sysctl_table[] = {
- /* Values measured in milliseconds */
+ /* Values measured in milliseconds but used in jiffies */
{
.procname = "req_ack_delay",
.data = &rxrpc_requested_ack_delay,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&zero,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "soft_ack_delay",
.data = &rxrpc_soft_ack_delay,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "idle_ack_delay",
.data = &rxrpc_idle_ack_delay,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
- },
- {
- .procname = "resend_timeout",
- .data = &rxrpc_resend_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "idle_conn_expiry",
.data = &rxrpc_conn_idle_client_expiry,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
{
.procname = "idle_conn_fast_expiry",
.data = &rxrpc_conn_idle_client_fast_expiry,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
-
- /* Values measured in seconds but used in jiffies */
{
- .procname = "max_call_lifetime",
- .data = &rxrpc_max_call_lifetime,
- .maxlen = sizeof(unsigned int),
+ .procname = "resend_timeout",
+ .data = &rxrpc_resend_timeout,
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
- .extra1 = (void *)&one,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
},
/* Non-time values */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c03d86a7775e..f24a6ae6819a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -857,17 +857,14 @@ config NET_ACT_TUNNEL_KEY
config NET_IFE_SKBMARK
tristate "Support to encoding decoding skb mark on IFE action"
depends on NET_ACT_IFE
- ---help---
config NET_IFE_SKBPRIO
tristate "Support to encoding decoding skb prio on IFE action"
depends on NET_ACT_IFE
- ---help---
config NET_IFE_SKBTCINDEX
tristate "Support to encoding decoding skb tcindex on IFE action"
depends on NET_ACT_IFE
- ---help---
config NET_CLS_IND
bool "Incoming device classification"
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4d33a50a8a6d..52622a3d2517 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -99,7 +99,7 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
p->tcfa_refcnt--;
if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
if (p->ops->cleanup)
- p->ops->cleanup(p, bind);
+ p->ops->cleanup(p);
tcf_idr_remove(p->idrinfo, p);
ret = ACT_P_DELETED;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5ef8ce8c83d4..b3f2c15affa7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -357,7 +357,7 @@ out:
return ret;
}
-static void tcf_bpf_cleanup(struct tc_action *act, int bind)
+static void tcf_bpf_cleanup(struct tc_action *act)
{
struct tcf_bpf_cfg tmp;
@@ -401,16 +401,14 @@ static __net_init int bpf_init_net(struct net *net)
return tc_action_net_init(tn, &act_bpf_ops);
}
-static void __net_exit bpf_exit_net(struct net *net)
+static void __net_exit bpf_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, bpf_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, bpf_net_id);
}
static struct pernet_operations bpf_net_ops = {
.init = bpf_init_net,
- .exit = bpf_exit_net,
+ .exit_batch = bpf_exit_net,
.id = &bpf_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 10b7a8855a6c..2b15ba84e0c8 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -209,16 +209,14 @@ static __net_init int connmark_init_net(struct net *net)
return tc_action_net_init(tn, &act_connmark_ops);
}
-static void __net_exit connmark_exit_net(struct net *net)
+static void __net_exit connmark_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, connmark_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, connmark_net_id);
}
static struct pernet_operations connmark_net_ops = {
.init = connmark_init_net,
- .exit = connmark_exit_net,
+ .exit_batch = connmark_exit_net,
.id = &connmark_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d836f998117b..b7ba9b06b147 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
int bind)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
+ struct tcf_csum_params *params_old, *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
struct tcf_csum *p;
@@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
- &act_csum_ops, bind, false);
+ &act_csum_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
p = to_tcf_csum(*a);
- spin_lock_bh(&p->tcf_lock);
- p->tcf_action = parm->action;
- p->update_flags = parm->update_flags;
- spin_unlock_bh(&p->tcf_lock);
+ ASSERT_RTNL();
+
+ params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+ if (unlikely(!params_new)) {
+ if (ret == ACT_P_CREATED)
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
+ params_old = rtnl_dereference(p->params);
+
+ params_new->action = parm->action;
+ params_new->update_flags = parm->update_flags;
+ rcu_assign_pointer(p->params, params_new);
+ if (params_old)
+ kfree_rcu(params_old, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
- int action;
+ struct tcf_csum_params *params;
u32 update_flags;
+ int action;
+
+ rcu_read_lock();
+ params = rcu_dereference(p->params);
- spin_lock(&p->tcf_lock);
tcf_lastuse_update(&p->tcf_tm);
- bstats_update(&p->tcf_bstats, skb);
- action = p->tcf_action;
- update_flags = p->update_flags;
- spin_unlock(&p->tcf_lock);
+ bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
+ action = params->action;
if (unlikely(action == TC_ACT_SHOT))
- goto drop;
+ goto drop_stats;
+ update_flags = params->update_flags;
switch (tc_skb_protocol(skb)) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
@@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
break;
}
+unlock:
+ rcu_read_unlock();
return action;
drop:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.drops++;
- spin_unlock(&p->tcf_lock);
- return TC_ACT_SHOT;
+ action = TC_ACT_SHOT;
+
+drop_stats:
+ qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
+ goto unlock;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
struct tc_csum opt = {
- .update_flags = p->update_flags,
.index = p->tcf_index,
- .action = p->tcf_action,
.refcnt = p->tcf_refcnt - ref,
.bindcnt = p->tcf_bindcnt - bind,
};
struct tcf_t t;
+ params = rtnl_dereference(p->params);
+ opt.action = params->action;
+ opt.update_flags = params->update_flags;
+
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -600,6 +620,15 @@ nla_put_failure:
return -1;
}
+static void tcf_csum_cleanup(struct tc_action *a)
+{
+ struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
+
+ params = rcu_dereference_protected(p->params, 1);
+ kfree_rcu(params, rcu);
+}
+
static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops)
@@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = {
.act = tcf_csum,
.dump = tcf_csum_dump,
.init = tcf_csum_init,
+ .cleanup = tcf_csum_cleanup,
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
.size = sizeof(struct tcf_csum),
@@ -635,16 +665,14 @@ static __net_init int csum_init_net(struct net *net)
return tc_action_net_init(tn, &act_csum_ops);
}
-static void __net_exit csum_exit_net(struct net *net)
+static void __net_exit csum_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, csum_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, csum_net_id);
}
static struct pernet_operations csum_net_ops = {
.init = csum_init_net,
- .exit = csum_exit_net,
+ .exit_batch = csum_exit_net,
.id = &csum_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48ef7fc3..b56986d41c87 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
@@ -235,16 +235,14 @@ static __net_init int gact_init_net(struct net *net)
return tc_action_net_init(tn, &act_gact_ops);
}
-static void __net_exit gact_exit_net(struct net *net)
+static void __net_exit gact_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, gact_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, gact_net_id);
}
static struct pernet_operations gact_net_ops = {
.init = gact_init_net,
- .exit = gact_exit_net,
+ .exit_batch = gact_exit_net,
.id = &gact_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3007cb1310ea..5954e992685a 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -387,7 +387,7 @@ out_nlmsg_trim:
}
/* under ife->tcf_lock */
-static void _tcf_ife_cleanup(struct tc_action *a, int bind)
+static void _tcf_ife_cleanup(struct tc_action *a)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_meta_info *e, *n;
@@ -405,13 +405,13 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
}
}
-static void tcf_ife_cleanup(struct tc_action *a, int bind)
+static void tcf_ife_cleanup(struct tc_action *a)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p;
spin_lock_bh(&ife->tcf_lock);
- _tcf_ife_cleanup(a, bind);
+ _tcf_ife_cleanup(a);
spin_unlock_bh(&ife->tcf_lock);
p = rcu_dereference_protected(ife->params, 1);
@@ -546,7 +546,7 @@ metadata_parse_err:
if (exists)
tcf_idr_release(*a, bind);
if (ret == ACT_P_CREATED)
- _tcf_ife_cleanup(*a, bind);
+ _tcf_ife_cleanup(*a);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
@@ -567,7 +567,7 @@ metadata_parse_err:
err = use_all_metadata(ife);
if (err) {
if (ret == ACT_P_CREATED)
- _tcf_ife_cleanup(*a, bind);
+ _tcf_ife_cleanup(*a);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
@@ -858,16 +858,14 @@ static __net_init int ife_init_net(struct net *net)
return tc_action_net_init(tn, &act_ife_ops);
}
-static void __net_exit ife_exit_net(struct net *net)
+static void __net_exit ife_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, ife_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, ife_net_id);
}
static struct pernet_operations ife_net_ops = {
.init = ife_init_net,
- .exit = ife_exit_net,
+ .exit_batch = ife_exit_net,
.id = &ife_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d9e399a7e3d5..06e380ae0928 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -77,7 +77,7 @@ static void ipt_destroy_target(struct xt_entry_target *t)
module_put(par.target->me);
}
-static void tcf_ipt_release(struct tc_action *a, int bind)
+static void tcf_ipt_release(struct tc_action *a)
{
struct tcf_ipt *ipt = to_ipt(a);
ipt_destroy_target(ipt->tcfi_t);
@@ -337,16 +337,14 @@ static __net_init int ipt_init_net(struct net *net)
return tc_action_net_init(tn, &act_ipt_ops);
}
-static void __net_exit ipt_exit_net(struct net *net)
+static void __net_exit ipt_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, ipt_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, ipt_net_id);
}
static struct pernet_operations ipt_net_ops = {
.init = ipt_init_net,
- .exit = ipt_exit_net,
+ .exit_batch = ipt_exit_net,
.id = &ipt_net_id,
.size = sizeof(struct tc_action_net),
};
@@ -387,16 +385,14 @@ static __net_init int xt_init_net(struct net *net)
return tc_action_net_init(tn, &act_xt_ops);
}
-static void __net_exit xt_exit_net(struct net *net)
+static void __net_exit xt_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, xt_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, xt_net_id);
}
static struct pernet_operations xt_net_ops = {
.init = xt_init_net,
- .exit = xt_exit_net,
+ .exit_batch = xt_exit_net,
.id = &xt_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 1e3f10e5da99..6445184b2759 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
#include <net/pkt_sched.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
static int skbmark_encode(struct sk_buff *skb, void *skbdata,
struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 2ea1f26c9e96..7221437ca3a6 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
#include <net/pkt_sched.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
struct tcf_meta_info *e)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8b3e59388480..e6ff88f72900 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -29,7 +29,6 @@
#include <net/tc_act/tc_mirred.h>
static LIST_HEAD(mirred_list);
-static DEFINE_SPINLOCK(mirred_list_lock);
static bool tcf_mirred_is_act_redirect(int action)
{
@@ -50,18 +49,15 @@ static bool tcf_mirred_act_wants_ingress(int action)
}
}
-static void tcf_mirred_release(struct tc_action *a, int bind)
+static void tcf_mirred_release(struct tc_action *a)
{
struct tcf_mirred *m = to_mirred(a);
struct net_device *dev;
- /* We could be called either in a RCU callback or with RTNL lock held. */
- spin_lock_bh(&mirred_list_lock);
list_del(&m->tcfm_list);
- dev = rcu_dereference_protected(m->tcfm_dev, 1);
+ dev = rtnl_dereference(m->tcfm_dev);
if (dev)
dev_put(dev);
- spin_unlock_bh(&mirred_list_lock);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -139,8 +135,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
m->tcf_action = parm->action;
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
- m->tcfm_ifindex = parm->ifindex;
- m->net = net;
if (ret != ACT_P_CREATED)
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
@@ -149,9 +143,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED) {
- spin_lock_bh(&mirred_list_lock);
list_add(&m->tcfm_list, &mirred_list);
- spin_unlock_bh(&mirred_list_lock);
tcf_idr_insert(tn, *a);
}
@@ -239,7 +231,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_t *tm = &m->tcf_tm;
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -247,13 +239,14 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_mirred *m = to_mirred(a);
+ struct net_device *dev = rtnl_dereference(m->tcfm_dev);
struct tc_mirred opt = {
.index = m->tcf_index,
.action = m->tcf_action,
.refcnt = m->tcf_refcnt - ref,
.bindcnt = m->tcf_bindcnt - bind,
.eaction = m->tcfm_eaction,
- .ifindex = m->tcfm_ifindex,
+ .ifindex = dev ? dev->ifindex : 0,
};
struct tcf_t t;
@@ -294,7 +287,6 @@ static int mirred_device_event(struct notifier_block *unused,
ASSERT_RTNL();
if (event == NETDEV_UNREGISTER) {
- spin_lock_bh(&mirred_list_lock);
list_for_each_entry(m, &mirred_list, tcfm_list) {
if (rcu_access_pointer(m->tcfm_dev) == dev) {
dev_put(dev);
@@ -304,7 +296,6 @@ static int mirred_device_event(struct notifier_block *unused,
RCU_INIT_POINTER(m->tcfm_dev, NULL);
}
}
- spin_unlock_bh(&mirred_list_lock);
}
return NOTIFY_DONE;
@@ -318,7 +309,7 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
{
struct tcf_mirred *m = to_mirred(a);
- return __dev_get_by_index(m->net, m->tcfm_ifindex);
+ return rtnl_dereference(m->tcfm_dev);
}
static struct tc_action_ops act_mirred_ops = {
@@ -343,16 +334,14 @@ static __net_init int mirred_init_net(struct net *net)
return tc_action_net_init(tn, &act_mirred_ops);
}
-static void __net_exit mirred_exit_net(struct net *net)
+static void __net_exit mirred_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, mirred_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, mirred_net_id);
}
static struct pernet_operations mirred_net_ops = {
.init = mirred_init_net,
- .exit = mirred_exit_net,
+ .exit_batch = mirred_exit_net,
.id = &mirred_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c365d01b99c8..98c6a4b2f523 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -310,16 +310,14 @@ static __net_init int nat_init_net(struct net *net)
return tc_action_net_init(tn, &act_nat_ops);
}
-static void __net_exit nat_exit_net(struct net *net)
+static void __net_exit nat_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, nat_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, nat_net_id);
}
static struct pernet_operations nat_net_ops = {
.init = nat_init_net,
- .exit = nat_exit_net,
+ .exit_batch = nat_exit_net,
.id = &nat_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 491fe5deb09e..349beaffb29e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -216,7 +216,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_pedit_cleanup(struct tc_action *a, int bind)
+static void tcf_pedit_cleanup(struct tc_action *a)
{
struct tcf_pedit *p = to_pedit(a);
struct tc_pedit_key *keys = p->tcfp_keys;
@@ -453,16 +453,14 @@ static __net_init int pedit_init_net(struct net *net)
return tc_action_net_init(tn, &act_pedit_ops);
}
-static void __net_exit pedit_exit_net(struct net *net)
+static void __net_exit pedit_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, pedit_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, pedit_net_id);
}
static struct pernet_operations pedit_net_ops = {
.init = pedit_init_net,
- .exit = pedit_exit_net,
+ .exit_batch = pedit_exit_net,
.id = &pedit_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 3bb2ebf9e9ae..95d3c9097b25 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -118,13 +118,13 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
police = to_police(*a);
if (parm->rate.rate) {
err = -ENOMEM;
- R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
+ R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE], NULL);
if (R_tab == NULL)
goto failure;
if (parm->peakrate.rate) {
P_tab = qdisc_get_rtab(&parm->peakrate,
- tb[TCA_POLICE_PEAKRATE]);
+ tb[TCA_POLICE_PEAKRATE], NULL);
if (P_tab == NULL)
goto failure;
}
@@ -334,16 +334,14 @@ static __net_init int police_init_net(struct net *net)
return tc_action_net_init(tn, &act_police_ops);
}
-static void __net_exit police_exit_net(struct net *net)
+static void __net_exit police_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, police_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, police_net_id);
}
static struct pernet_operations police_net_ops = {
.init = police_init_net,
- .exit = police_exit_net,
+ .exit_batch = police_exit_net,
.id = &police_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 8b5abcd2f32f..1ba0df238756 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_sample_cleanup_rcu(struct rcu_head *rcu)
+static void tcf_sample_cleanup(struct tc_action *a)
{
- struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu);
+ struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
- psample_group = rcu_dereference_protected(s->psample_group, 1);
+ psample_group = rtnl_dereference(s->psample_group);
RCU_INIT_POINTER(s->psample_group, NULL);
psample_group_put(psample_group);
}
-static void tcf_sample_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_sample *s = to_sample(a);
-
- call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
-}
-
static bool tcf_sample_dev_ok_push(struct net_device *dev)
{
switch (dev->type) {
@@ -243,16 +236,14 @@ static __net_init int sample_init_net(struct net *net)
return tc_action_net_init(tn, &act_sample_ops);
}
-static void __net_exit sample_exit_net(struct net *net)
+static void __net_exit sample_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, sample_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, sample_net_id);
}
static struct pernet_operations sample_net_ops = {
.init = sample_init_net,
- .exit = sample_exit_net,
+ .exit_batch = sample_exit_net,
.id = &sample_net_id,
.size = sizeof(struct tc_action_net),
};
@@ -264,7 +255,6 @@ static int __init sample_init_module(void)
static void __exit sample_cleanup_module(void)
{
- rcu_barrier();
tcf_unregister_action(&act_sample_ops, &sample_net_ops);
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7b57e5071a3..425eac11f6da 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
return d->tcf_action;
}
-static void tcf_simp_release(struct tc_action *a, int bind)
+static void tcf_simp_release(struct tc_action *a)
{
struct tcf_defact *d = to_defact(a);
kfree(d->tcfd_defdata);
@@ -204,16 +204,14 @@ static __net_init int simp_init_net(struct net *net)
return tc_action_net_init(tn, &act_simp_ops);
}
-static void __net_exit simp_exit_net(struct net *net)
+static void __net_exit simp_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, simp_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, simp_net_id);
}
static struct pernet_operations simp_net_ops = {
.init = simp_init_net,
- .exit = simp_exit_net,
+ .exit_batch = simp_exit_net,
.id = &simp_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 59949d61f20d..5a3f691bb545 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -241,16 +241,14 @@ static __net_init int skbedit_init_net(struct net *net)
return tc_action_net_init(tn, &act_skbedit_ops);
}
-static void __net_exit skbedit_exit_net(struct net *net)
+static void __net_exit skbedit_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, skbedit_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, skbedit_net_id);
}
static struct pernet_operations skbedit_net_ops = {
.init = skbedit_init_net,
- .exit = skbedit_exit_net,
+ .exit_batch = skbedit_exit_net,
.id = &skbedit_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index b642ad3d39dd..fa975262dbac 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -184,7 +184,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_skbmod_cleanup(struct tc_action *a, int bind)
+static void tcf_skbmod_cleanup(struct tc_action *a)
{
struct tcf_skbmod *d = to_skbmod(a);
struct tcf_skbmod_params *p;
@@ -266,16 +266,14 @@ static __net_init int skbmod_init_net(struct net *net)
return tc_action_net_init(tn, &act_skbmod_ops);
}
-static void __net_exit skbmod_exit_net(struct net *net)
+static void __net_exit skbmod_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, skbmod_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, skbmod_net_id);
}
static struct pernet_operations skbmod_net_ops = {
.init = skbmod_init_net,
- .exit = skbmod_exit_net,
+ .exit_batch = skbmod_exit_net,
.id = &skbmod_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 30c96274c638..0e23aac09ad6 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,7 +201,7 @@ err_out:
return ret;
}
-static void tunnel_key_release(struct tc_action *a, int bind)
+static void tunnel_key_release(struct tc_action *a)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
@@ -325,16 +325,14 @@ static __net_init int tunnel_key_init_net(struct net *net)
return tc_action_net_init(tn, &act_tunnel_key_ops);
}
-static void __net_exit tunnel_key_exit_net(struct net *net)
+static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, tunnel_key_net_id);
}
static struct pernet_operations tunnel_key_net_ops = {
.init = tunnel_key_init_net,
- .exit = tunnel_key_exit_net,
+ .exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 97f717a13ad5..e1a1b3f3983a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -219,7 +219,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return ret;
}
-static void tcf_vlan_cleanup(struct tc_action *a, int bind)
+static void tcf_vlan_cleanup(struct tc_action *a)
{
struct tcf_vlan *v = to_vlan(a);
struct tcf_vlan_params *p;
@@ -301,16 +301,14 @@ static __net_init int vlan_init_net(struct net *net)
return tc_action_net_init(tn, &act_vlan_ops);
}
-static void __net_exit vlan_exit_net(struct net *net)
+static void __net_exit vlan_exit_net(struct list_head *net_list)
{
- struct tc_action_net *tn = net_generic(net, vlan_net_id);
-
- tc_action_net_exit(tn);
+ tc_action_net_exit(net_list, vlan_net_id);
}
static struct pernet_operations vlan_net_ops = {
.init = vlan_init_net,
- .exit = vlan_exit_net,
+ .exit_batch = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 7d97f612c9b9..bcb4ccb5f894 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,8 +23,8 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
-#include <linux/err.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -122,8 +122,8 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
}
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
- u32 prio, u32 parent, struct Qdisc *q,
- struct tcf_chain *chain)
+ u32 prio, struct tcf_chain *chain,
+ struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
int err;
@@ -149,6 +149,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
module_put(tp->ops->owner);
err = -EAGAIN;
} else {
+ NL_SET_ERR_MSG(extack, "TC classifier not found");
err = -ENOENT;
}
goto errout;
@@ -157,8 +158,6 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->classify = tp->ops->classify;
tp->protocol = protocol;
tp->prio = prio;
- tp->classid = parent;
- tp->q = q;
tp->chain = chain;
err = tp->ops->init(tp);
@@ -173,13 +172,20 @@ errout:
return ERR_PTR(err);
}
-static void tcf_proto_destroy(struct tcf_proto *tp)
+static void tcf_proto_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
- tp->ops->destroy(tp);
+ tp->ops->destroy(tp, extack);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
}
+struct tcf_filter_chain_list_item {
+ struct list_head list;
+ tcf_chain_head_change_t *chain_head_change;
+ void *chain_head_change_priv;
+};
+
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
u32 chain_index)
{
@@ -188,6 +194,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (!chain)
return NULL;
+ INIT_LIST_HEAD(&chain->filter_chain_list);
list_add_tail(&chain->list, &block->chain_list);
chain->block = block;
chain->index = chain_index;
@@ -195,12 +202,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
return chain;
}
+static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
+ struct tcf_proto *tp_head)
+{
+ if (item->chain_head_change)
+ item->chain_head_change(tp_head, item->chain_head_change_priv);
+}
static void tcf_chain_head_change(struct tcf_chain *chain,
struct tcf_proto *tp_head)
{
- if (chain->chain_head_change)
- chain->chain_head_change(tp_head,
- chain->chain_head_change_priv);
+ struct tcf_filter_chain_list_item *item;
+
+ list_for_each_entry(item, &chain->filter_chain_list, list)
+ tcf_chain_head_change_item(item, tp_head);
}
static void tcf_chain_flush(struct tcf_chain *chain)
@@ -210,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
tcf_chain_head_change(chain, NULL);
while (tp) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
tp = rtnl_dereference(chain->filter_chain);
tcf_chain_put(chain);
}
@@ -218,8 +232,12 @@ static void tcf_chain_flush(struct tcf_chain *chain)
static void tcf_chain_destroy(struct tcf_chain *chain)
{
+ struct tcf_block *block = chain->block;
+
list_del(&chain->list);
kfree(chain);
+ if (list_empty(&block->chain_list))
+ kfree(block);
}
static void tcf_chain_hold(struct tcf_chain *chain)
@@ -250,62 +268,300 @@ void tcf_chain_put(struct tcf_chain *chain)
}
EXPORT_SYMBOL(tcf_chain_put);
-static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
- struct tcf_block_ext_info *ei,
- enum tc_block_command command)
+static bool tcf_block_offload_in_use(struct tcf_block *block)
+{
+ return block->offloadcnt;
+}
+
+static int tcf_block_offload_cmd(struct tcf_block *block,
+ struct net_device *dev,
+ struct tcf_block_ext_info *ei,
+ enum tc_block_command command)
{
- struct net_device *dev = q->dev_queue->dev;
struct tc_block_offload bo = {};
- if (!dev->netdev_ops->ndo_setup_tc)
- return;
bo.command = command;
bo.binder_type = ei->binder_type;
bo.block = block;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
}
-static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
- struct tcf_block_ext_info *ei)
+static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
{
- tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
+ struct net_device *dev = q->dev_queue->dev;
+ int err;
+
+ if (!dev->netdev_ops->ndo_setup_tc)
+ goto no_offload_dev_inc;
+
+ /* If tc offload feature is disabled and the block we try to bind
+ * to already has some offloaded filters, forbid to bind.
+ */
+ if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
+ return -EOPNOTSUPP;
+
+ err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
+ if (err == -EOPNOTSUPP)
+ goto no_offload_dev_inc;
+ return err;
+
+no_offload_dev_inc:
+ if (tcf_block_offload_in_use(block))
+ return -EOPNOTSUPP;
+ block->nooffloaddevcnt++;
+ return 0;
}
static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
- tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
+ struct net_device *dev = q->dev_queue->dev;
+ int err;
+
+ if (!dev->netdev_ops->ndo_setup_tc)
+ goto no_offload_dev_dec;
+ err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
+ if (err == -EOPNOTSUPP)
+ goto no_offload_dev_dec;
+ return;
+
+no_offload_dev_dec:
+ WARN_ON(block->nooffloaddevcnt-- == 0);
}
-int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
- struct tcf_block_ext_info *ei)
+static int
+tcf_chain_head_change_cb_add(struct tcf_chain *chain,
+ struct tcf_block_ext_info *ei,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_filter_chain_list_item *item;
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item) {
+ NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
+ return -ENOMEM;
+ }
+ item->chain_head_change = ei->chain_head_change;
+ item->chain_head_change_priv = ei->chain_head_change_priv;
+ if (chain->filter_chain)
+ tcf_chain_head_change_item(item, chain->filter_chain);
+ list_add(&item->list, &chain->filter_chain_list);
+ return 0;
+}
+
+static void
+tcf_chain_head_change_cb_del(struct tcf_chain *chain,
+ struct tcf_block_ext_info *ei)
+{
+ struct tcf_filter_chain_list_item *item;
+
+ list_for_each_entry(item, &chain->filter_chain_list, list) {
+ if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
+ (item->chain_head_change == ei->chain_head_change &&
+ item->chain_head_change_priv == ei->chain_head_change_priv)) {
+ tcf_chain_head_change_item(item, NULL);
+ list_del(&item->list);
+ kfree(item);
+ return;
+ }
+ }
+ WARN_ON(1);
+}
+
+struct tcf_net {
+ struct idr idr;
+};
+
+static unsigned int tcf_net_id;
+
+static int tcf_block_insert(struct tcf_block *block, struct net *net,
+ u32 block_index, struct netlink_ext_ack *extack)
+{
+ struct tcf_net *tn = net_generic(net, tcf_net_id);
+ int err;
+
+ err = idr_alloc_ext(&tn->idr, block, NULL, block_index,
+ block_index + 1, GFP_KERNEL);
+ if (err)
+ return err;
+ block->index = block_index;
+ return 0;
+}
+
+static void tcf_block_remove(struct tcf_block *block, struct net *net)
+{
+ struct tcf_net *tn = net_generic(net, tcf_net_id);
+
+ idr_remove_ext(&tn->idr, block->index);
+}
+
+static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
+ struct netlink_ext_ack *extack)
{
- struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
+ struct tcf_block *block;
struct tcf_chain *chain;
int err;
- if (!block)
- return -ENOMEM;
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block) {
+ NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
+ return ERR_PTR(-ENOMEM);
+ }
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->cb_list);
+ INIT_LIST_HEAD(&block->owner_list);
/* Create chain 0 by default, it has to be always present. */
chain = tcf_chain_create(block, 0);
if (!chain) {
+ NL_SET_ERR_MSG(extack, "Failed to create new tcf chain");
err = -ENOMEM;
goto err_chain_create;
}
- WARN_ON(!ei->chain_head_change);
- chain->chain_head_change = ei->chain_head_change;
- chain->chain_head_change_priv = ei->chain_head_change_priv;
block->net = qdisc_net(q);
+ block->refcnt = 1;
+ block->net = net;
block->q = q;
- tcf_block_offload_bind(block, q, ei);
- *p_block = block;
- return 0;
+ return block;
err_chain_create:
kfree(block);
+ return ERR_PTR(err);
+}
+
+static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
+{
+ struct tcf_net *tn = net_generic(net, tcf_net_id);
+
+ return idr_find_ext(&tn->idr, block_index);
+}
+
+static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block)
+{
+ return list_first_entry(&block->chain_list, struct tcf_chain, list);
+}
+
+struct tcf_block_owner_item {
+ struct list_head list;
+ struct Qdisc *q;
+ enum tcf_block_binder_type binder_type;
+};
+
+static void
+tcf_block_owner_netif_keep_dst(struct tcf_block *block,
+ struct Qdisc *q,
+ enum tcf_block_binder_type binder_type)
+{
+ if (block->keep_dst &&
+ binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+ binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ netif_keep_dst(qdisc_dev(q));
+}
+
+void tcf_block_netif_keep_dst(struct tcf_block *block)
+{
+ struct tcf_block_owner_item *item;
+
+ block->keep_dst = true;
+ list_for_each_entry(item, &block->owner_list, list)
+ tcf_block_owner_netif_keep_dst(block, item->q,
+ item->binder_type);
+}
+EXPORT_SYMBOL(tcf_block_netif_keep_dst);
+
+static int tcf_block_owner_add(struct tcf_block *block,
+ struct Qdisc *q,
+ enum tcf_block_binder_type binder_type)
+{
+ struct tcf_block_owner_item *item;
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+ item->q = q;
+ item->binder_type = binder_type;
+ list_add(&item->list, &block->owner_list);
+ return 0;
+}
+
+static void tcf_block_owner_del(struct tcf_block *block,
+ struct Qdisc *q,
+ enum tcf_block_binder_type binder_type)
+{
+ struct tcf_block_owner_item *item;
+
+ list_for_each_entry(item, &block->owner_list, list) {
+ if (item->q == q && item->binder_type == binder_type) {
+ list_del(&item->list);
+ kfree(item);
+ return;
+ }
+ }
+ WARN_ON(1);
+}
+
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = qdisc_net(q);
+ struct tcf_block *block = NULL;
+ bool created = false;
+ int err;
+
+ if (ei->block_index) {
+ /* block_index not 0 means the shared block is requested */
+ block = tcf_block_lookup(net, ei->block_index);
+ if (block)
+ block->refcnt++;
+ }
+
+ if (!block) {
+ block = tcf_block_create(net, q, extack);
+ if (IS_ERR(block))
+ return PTR_ERR(block);
+ created = true;
+ if (ei->block_index) {
+ err = tcf_block_insert(block, net,
+ ei->block_index, extack);
+ if (err)
+ goto err_block_insert;
+ }
+ }
+
+ err = tcf_block_owner_add(block, q, ei->binder_type);
+ if (err)
+ goto err_block_owner_add;
+
+ tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
+
+ err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block),
+ ei, extack);
+ if (err)
+ goto err_chain_head_change_cb_add;
+
+ err = tcf_block_offload_bind(block, q, ei);
+ if (err)
+ goto err_block_offload_bind;
+
+ *p_block = block;
+ return 0;
+
+err_block_offload_bind:
+ tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
+err_chain_head_change_cb_add:
+ tcf_block_owner_del(block, q, ei->binder_type);
+err_block_owner_add:
+ if (created) {
+ if (tcf_block_shared(block))
+ tcf_block_remove(block, net);
+err_block_insert:
+ kfree(tcf_block_chain_zero(block));
+ kfree(block);
+ } else {
+ block->refcnt--;
+ }
return err;
}
EXPORT_SYMBOL(tcf_block_get_ext);
@@ -318,7 +574,8 @@ static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
}
int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct netlink_ext_ack *extack)
{
struct tcf_block_ext_info ei = {
.chain_head_change = tcf_chain_head_change_dflt,
@@ -326,44 +583,47 @@ int tcf_block_get(struct tcf_block **p_block,
};
WARN_ON(!p_filter_chain);
- return tcf_block_get_ext(p_block, q, &ei);
+ return tcf_block_get_ext(p_block, q, &ei, extack);
}
EXPORT_SYMBOL(tcf_block_get);
-static void tcf_block_put_final(struct work_struct *work)
-{
- struct tcf_block *block = container_of(work, struct tcf_block, work);
- struct tcf_chain *chain, *tmp;
-
- rtnl_lock();
- /* Only chain 0 should be still here. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
- tcf_chain_put(chain);
- rtnl_unlock();
- kfree(block);
-}
-
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
- * actions should be all removed after flushing. However, filters are now
- * destroyed in tc filter workqueue with RTNL lock, they can not race here.
+ * actions should be all removed after flushing.
*/
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
struct tcf_chain *chain, *tmp;
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
- tcf_chain_flush(chain);
+ if (!block)
+ return;
+ tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
+ tcf_block_owner_del(block, q, ei->binder_type);
+
+ if (--block->refcnt == 0) {
+ if (tcf_block_shared(block))
+ tcf_block_remove(block, block->net);
+
+ /* Hold a refcnt for all chains, so that they don't disappear
+ * while we are iterating.
+ */
+ list_for_each_entry(chain, &block->chain_list, list)
+ tcf_chain_hold(chain);
+
+ list_for_each_entry(chain, &block->chain_list, list)
+ tcf_chain_flush(chain);
+ }
tcf_block_offload_unbind(block, q, ei);
- INIT_WORK(&block->work, tcf_block_put_final);
- /* Wait for existing RCU callbacks to cool down, make sure their works
- * have been queued before this. We can not flush pending works here
- * because we are holding the RTNL lock.
- */
- rcu_barrier();
- tcf_queue_work(&block->work);
+ if (block->refcnt == 0) {
+ /* At this point, all the chains should have refcnt >= 1. */
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_put(chain);
+
+ /* Finally, put chain 0 and allow block to be freed. */
+ tcf_chain_put(tcf_block_chain_zero(block));
+ }
}
EXPORT_SYMBOL(tcf_block_put_ext);
@@ -421,9 +681,16 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
{
struct tcf_block_cb *block_cb;
+ /* At this point, playback of previous block cb calls is not supported,
+ * so forbid to register to block which already has some offloaded
+ * filters present.
+ */
+ if (tcf_block_offload_in_use(block))
+ return ERR_PTR(-EOPNOTSUPP);
+
block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
if (!block_cb)
- return NULL;
+ return ERR_PTR(-ENOMEM);
block_cb->cb = cb;
block_cb->cb_ident = cb_ident;
block_cb->cb_priv = cb_priv;
@@ -439,7 +706,7 @@ int tcf_block_cb_register(struct tcf_block *block,
struct tcf_block_cb *block_cb;
block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
- return block_cb ? 0 : -ENOMEM;
+ return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
}
EXPORT_SYMBOL(tcf_block_cb_register);
@@ -469,6 +736,10 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
int ok_count = 0;
int err;
+ /* Make sure all netdevs sharing this block are offload-capable. */
+ if (block->nooffloaddevcnt && err_stop)
+ return -EOPNOTSUPP;
+
list_for_each_entry(block_cb, &block->cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err) {
@@ -522,8 +793,9 @@ reclassify:
#ifdef CONFIG_NET_CLS_ACT
reset:
if (unlikely(limit++ >= max_reclassify_loop)) {
- net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
- tp->q->ops->id, tp->prio & 0xffff,
+ net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
+ tp->chain->block->index,
+ tp->prio & 0xffff,
ntohs(tp->protocol));
return TC_ACT_SHOT;
}
@@ -596,8 +868,9 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
}
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
- struct tcf_proto *tp, struct Qdisc *q, u32 parent,
- void *fh, u32 portid, u32 seq, u16 flags, int event)
+ struct tcf_proto *tp, struct tcf_block *block,
+ struct Qdisc *q, u32 parent, void *fh,
+ u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -610,8 +883,13 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
- tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
- tcm->tcm_parent = parent;
+ if (q) {
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ tcm->tcm_parent = parent;
+ } else {
+ tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
+ tcm->tcm_block_index = block->index;
+ }
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
goto nla_put_failure;
@@ -634,8 +912,8 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
- struct Qdisc *q, u32 parent,
- void *fh, int event, bool unicast)
+ struct tcf_block *block, struct Qdisc *q,
+ u32 parent, void *fh, int event, bool unicast)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -644,8 +922,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
- n->nlmsg_flags, event) <= 0) {
+ if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
+ n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -659,8 +937,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
- struct Qdisc *q, u32 parent,
- void *fh, bool unicast, bool *last)
+ struct tcf_block *block, struct Qdisc *q,
+ u32 parent, void *fh, bool unicast, bool *last,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -670,13 +949,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
- n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
+ n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
}
- err = tp->ops->delete(tp, fh, last);
+ err = tp->ops->delete(tp, fh, last, extack);
if (err) {
kfree_skb(skb);
return err;
@@ -685,20 +965,24 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (unicast)
return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
+ return err;
}
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
- struct Qdisc *q, u32 parent,
- struct nlmsghdr *n,
+ struct tcf_block *block, struct Qdisc *q,
+ u32 parent, struct nlmsghdr *n,
struct tcf_chain *chain, int event)
{
struct tcf_proto *tp;
for (tp = rtnl_dereference(chain->filter_chain);
tp; tp = rtnl_dereference(tp->next))
- tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
+ tfilter_notify(net, oskb, n, tp, block,
+ q, parent, 0, event, false);
}
/* Add/change/delete/get a filter node */
@@ -714,13 +998,11 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
bool prio_allocate;
u32 parent;
u32 chain_index;
- struct net_device *dev;
- struct Qdisc *q;
+ struct Qdisc *q = NULL;
struct tcf_chain_info chain_info;
struct tcf_chain *chain = NULL;
struct tcf_block *block;
struct tcf_proto *tp;
- const struct Qdisc_class_ops *cops;
unsigned long cl;
void *fh;
int err;
@@ -747,8 +1029,10 @@ replay:
if (prio == 0) {
switch (n->nlmsg_type) {
case RTM_DELTFILTER:
- if (protocol || t->tcm_handle || tca[TCA_KIND])
+ if (protocol || t->tcm_handle || tca[TCA_KIND]) {
+ NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
return -ENOENT;
+ }
break;
case RTM_NEWTFILTER:
/* If no priority is provided by the user,
@@ -761,63 +1045,91 @@ replay:
}
/* fall-through */
default:
+ NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
return -ENOENT;
}
}
/* Find head of filter chain. */
- /* Find link */
- dev = __dev_get_by_index(net, t->tcm_ifindex);
- if (dev == NULL)
- return -ENODEV;
-
- /* Find qdisc */
- if (!parent) {
- q = dev->qdisc;
- parent = q->handle;
+ if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+ block = tcf_block_lookup(net, t->tcm_block_index);
+ if (!block) {
+ NL_SET_ERR_MSG(extack, "Block of given index was not found");
+ err = -EINVAL;
+ goto errout;
+ }
} else {
- q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
- if (q == NULL)
- return -EINVAL;
- }
+ const struct Qdisc_class_ops *cops;
+ struct net_device *dev;
- /* Is it classful? */
- cops = q->ops->cl_ops;
- if (!cops)
- return -EINVAL;
+ /* Find link */
+ dev = __dev_get_by_index(net, t->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
- if (!cops->tcf_block)
- return -EOPNOTSUPP;
+ /* Find qdisc */
+ if (!parent) {
+ q = dev->qdisc;
+ parent = q->handle;
+ } else {
+ q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
+ if (!q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+ return -EINVAL;
+ }
+ }
- /* Do we search for filter, attached to class? */
- if (TC_H_MIN(parent)) {
- cl = cops->find(q, parent);
- if (cl == 0)
- return -ENOENT;
- }
+ /* Is it classful? */
+ cops = q->ops->cl_ops;
+ if (!cops) {
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
+ return -EINVAL;
+ }
- /* And the last stroke */
- block = cops->tcf_block(q, cl);
- if (!block) {
- err = -EINVAL;
- goto errout;
+ if (!cops->tcf_block) {
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
+ return -EOPNOTSUPP;
+ }
+
+ /* Do we search for filter, attached to class? */
+ if (TC_H_MIN(parent)) {
+ cl = cops->find(q, parent);
+ if (cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
+ return -ENOENT;
+ }
+ }
+
+ /* And the last stroke */
+ block = cops->tcf_block(q, cl, extack);
+ if (!block) {
+ err = -EINVAL;
+ goto errout;
+ }
+ if (tcf_block_shared(block)) {
+ NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
+ err = -EOPNOTSUPP;
+ goto errout;
+ }
}
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
+ NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
goto errout;
}
chain = tcf_chain_get(block, chain_index,
n->nlmsg_type == RTM_NEWTFILTER);
if (!chain) {
+ NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
goto errout;
}
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
- tfilter_notify_chain(net, skb, q, parent, n,
+ tfilter_notify_chain(net, skb, block, q, parent, n,
chain, RTM_DELTFILTER);
tcf_chain_flush(chain);
err = 0;
@@ -827,6 +1139,7 @@ replay:
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, prio_allocate);
if (IS_ERR(tp)) {
+ NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
goto errout;
}
@@ -835,12 +1148,14 @@ replay:
/* Proto-tcf does not exist, create new one */
if (tca[TCA_KIND] == NULL || !protocol) {
+ NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
err = -EINVAL;
goto errout;
}
if (n->nlmsg_type != RTM_NEWTFILTER ||
!(n->nlmsg_flags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout;
}
@@ -849,13 +1164,14 @@ replay:
prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, parent, q, chain);
+ protocol, prio, chain, extack);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
goto errout;
}
tp_created = 1;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
goto errout;
}
@@ -865,15 +1181,16 @@ replay:
if (!fh) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, q, parent, fh,
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_DELTFILTER, false);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
err = 0;
goto errout;
}
if (n->nlmsg_type != RTM_NEWTFILTER ||
!(n->nlmsg_flags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout;
}
@@ -884,41 +1201,47 @@ replay:
case RTM_NEWTFILTER:
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
+ NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
}
break;
case RTM_DELTFILTER:
- err = tfilter_del_notify(net, skb, n, tp, q, parent,
- fh, false, &last);
+ err = tfilter_del_notify(net, skb, n, tp, block,
+ q, parent, fh, false, &last,
+ extack);
if (err)
goto errout;
if (last) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
}
goto errout;
case RTM_GETTFILTER:
- err = tfilter_notify(net, skb, n, tp, q, parent, fh,
- RTM_NEWTFILTER, true);
+ err = tfilter_notify(net, skb, n, tp, block, q, parent,
+ fh, RTM_NEWTFILTER, true);
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
goto errout;
default:
+ NL_SET_ERR_MSG(extack, "Invalid netlink message type");
err = -EINVAL;
goto errout;
}
}
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
- n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
+ n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
+ extack);
if (err == 0) {
if (tp_created)
tcf_chain_tp_insert(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, q, parent, fh,
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
}
errout:
@@ -934,6 +1257,7 @@ struct tcf_dump_args {
struct tcf_walker w;
struct sk_buff *skb;
struct netlink_callback *cb;
+ struct tcf_block *block;
struct Qdisc *q;
u32 parent;
};
@@ -943,7 +1267,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
struct tcf_dump_args *a = (void *)arg;
struct net *net = sock_net(a->skb->sk);
- return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
+ return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER);
@@ -954,6 +1278,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
long index_start, long *p_index)
{
struct net *net = sock_net(skb->sk);
+ struct tcf_block *block = chain->block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
struct tcf_dump_args arg;
struct tcf_proto *tp;
@@ -972,7 +1297,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, q, parent, 0,
+ if (tcf_fill_node(net, skb, tp, block, q, parent, 0,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
@@ -985,6 +1310,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.fn = tcf_node_dump;
arg.skb = skb;
arg.cb = cb;
+ arg.block = block;
arg.q = q;
arg.parent = parent;
arg.w.stop = 0;
@@ -1003,13 +1329,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
- struct net_device *dev;
- struct Qdisc *q;
+ struct Qdisc *q = NULL;
struct tcf_block *block;
struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
- unsigned long cl = 0;
- const struct Qdisc_class_ops *cops;
long index_start;
long index;
u32 parent;
@@ -1022,32 +1345,51 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return skb->len;
-
- parent = tcm->tcm_parent;
- if (!parent) {
- q = dev->qdisc;
- parent = q->handle;
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+ block = tcf_block_lookup(net, tcm->tcm_block_index);
+ if (!block)
+ goto out;
+ /* If we work with block index, q is NULL and parent value
+ * will never be used in the following code. The check
+ * in tcf_fill_node prevents it. However, compiler does not
+ * see that far, so set parent to zero to silence the warning
+ * about parent being uninitialized.
+ */
+ parent = 0;
} else {
- q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
- }
- if (!q)
- goto out;
- cops = q->ops->cl_ops;
- if (!cops)
- goto out;
- if (!cops->tcf_block)
- goto out;
- if (TC_H_MIN(tcm->tcm_parent)) {
- cl = cops->find(q, tcm->tcm_parent);
- if (cl == 0)
+ const struct Qdisc_class_ops *cops;
+ struct net_device *dev;
+ unsigned long cl = 0;
+
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return skb->len;
+
+ parent = tcm->tcm_parent;
+ if (!parent) {
+ q = dev->qdisc;
+ parent = q->handle;
+ } else {
+ q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+ }
+ if (!q)
+ goto out;
+ cops = q->ops->cl_ops;
+ if (!cops)
+ goto out;
+ if (!cops->tcf_block)
+ goto out;
+ if (TC_H_MIN(tcm->tcm_parent)) {
+ cl = cops->find(q, tcm->tcm_parent);
+ if (cl == 0)
+ goto out;
+ }
+ block = cops->tcf_block(q, cl, NULL);
+ if (!block)
goto out;
+ if (tcf_block_shared(block))
+ q = NULL;
}
- block = cops->tcf_block(q, cl);
- if (!block)
- goto out;
index_start = cb->args[0];
index = 0;
@@ -1082,7 +1424,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
+ struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -1115,8 +1458,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
}
#else
if ((exts->action && tb[exts->action]) ||
- (exts->police && tb[exts->police]))
+ (exts->police && tb[exts->police])) {
+ NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
return -EOPNOTSUPP;
+ }
#endif
return 0;
@@ -1250,18 +1595,50 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
}
EXPORT_SYMBOL(tc_setup_cb_call);
+static __net_init int tcf_net_init(struct net *net)
+{
+ struct tcf_net *tn = net_generic(net, tcf_net_id);
+
+ idr_init(&tn->idr);
+ return 0;
+}
+
+static void __net_exit tcf_net_exit(struct net *net)
+{
+ struct tcf_net *tn = net_generic(net, tcf_net_id);
+
+ idr_destroy(&tn->idr);
+}
+
+static struct pernet_operations tcf_net_ops = {
+ .init = tcf_net_init,
+ .exit = tcf_net_exit,
+ .id = &tcf_net_id,
+ .size = sizeof(struct tcf_net),
+};
+
static int __init tc_filter_init(void)
{
+ int err;
+
tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
if (!tc_filter_wq)
return -ENOMEM;
+ err = register_pernet_subsys(&tcf_net_ops);
+ if (err)
+ goto err_register_pernet_subsys;
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
tc_dump_tfilter, 0);
return 0;
+
+err_register_pernet_subsys:
+ destroy_workqueue(tc_filter_wq);
+ return err;
}
subsys_initcall(tc_filter_init);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5f169ded347e..d333f5c5101d 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void basic_destroy(struct tcf_proto *tp)
+static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
@@ -130,7 +130,8 @@ static void basic_destroy(struct tcf_proto *tp)
kfree_rcu(head, rcu);
}
-static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f = arg;
@@ -152,11 +153,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct basic_filter *f, unsigned long base,
struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
if (err < 0)
return err;
@@ -175,7 +177,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
@@ -221,7 +224,8 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
fnew->handle = idr_index;
}
- err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (err < 0) {
if (!fold)
idr_remove_ext(&head->handle_idr, fnew->handle);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a9f3e317055c..8e5326bc6440 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
struct list_head link;
struct tcf_result res;
bool exts_integrated;
- bool offloaded;
u32 gen_flags;
struct tcf_exts exts;
u32 handle;
@@ -148,99 +147,95 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
}
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- enum tc_clsbpf_command cmd)
+ struct cls_bpf_prog *oldprog,
+ struct netlink_ext_ack *extack)
{
- bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
struct tcf_block *block = tp->chain->block;
- bool skip_sw = tc_skip_sw(prog->gen_flags);
struct tc_cls_bpf_offload cls_bpf = {};
+ struct cls_bpf_prog *obj;
+ bool skip_sw;
int err;
- tc_cls_common_offload_init(&cls_bpf.common, tp);
- cls_bpf.command = cmd;
- cls_bpf.exts = &prog->exts;
- cls_bpf.prog = prog->filter;
- cls_bpf.name = prog->bpf_name;
- cls_bpf.exts_integrated = prog->exts_integrated;
- cls_bpf.gen_flags = prog->gen_flags;
+ skip_sw = prog && tc_skip_sw(prog->gen_flags);
+ obj = prog ?: oldprog;
+
+ tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags,
+ extack);
+ cls_bpf.command = TC_CLSBPF_OFFLOAD;
+ cls_bpf.exts = &obj->exts;
+ cls_bpf.prog = prog ? prog->filter : NULL;
+ cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+ cls_bpf.name = obj->bpf_name;
+ cls_bpf.exts_integrated = obj->exts_integrated;
+
+ if (oldprog)
+ tcf_block_offload_dec(block, &oldprog->gen_flags);
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
- if (addorrep) {
+ if (prog) {
if (err < 0) {
- cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ cls_bpf_offload_cmd(tp, oldprog, prog, extack);
return err;
} else if (err > 0) {
- prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+ tcf_block_offload_inc(block, &prog->gen_flags);
}
}
- if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+ if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
return -EINVAL;
return 0;
}
-static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- struct cls_bpf_prog *oldprog)
+static u32 cls_bpf_flags(u32 flags)
{
- struct cls_bpf_prog *obj = prog;
- enum tc_clsbpf_command cmd;
- bool skip_sw;
- int ret;
-
- skip_sw = tc_skip_sw(prog->gen_flags) ||
- (oldprog && tc_skip_sw(oldprog->gen_flags));
-
- if (oldprog && oldprog->offloaded) {
- if (!tc_skip_hw(prog->gen_flags)) {
- cmd = TC_CLSBPF_REPLACE;
- } else if (!tc_skip_sw(prog->gen_flags)) {
- obj = oldprog;
- cmd = TC_CLSBPF_DESTROY;
- } else {
- return -EINVAL;
- }
- } else {
- if (tc_skip_hw(prog->gen_flags))
- return skip_sw ? -EINVAL : 0;
- cmd = TC_CLSBPF_ADD;
- }
+ return flags & CLS_BPF_SUPPORTED_GEN_FLAGS;
+}
- ret = cls_bpf_offload_cmd(tp, obj, cmd);
- if (ret)
- return ret;
+static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ struct cls_bpf_prog *oldprog,
+ struct netlink_ext_ack *extack)
+{
+ if (prog && oldprog &&
+ cls_bpf_flags(prog->gen_flags) !=
+ cls_bpf_flags(oldprog->gen_flags))
+ return -EINVAL;
- obj->offloaded = true;
- if (oldprog)
- oldprog->offloaded = false;
+ if (prog && tc_skip_hw(prog->gen_flags))
+ prog = NULL;
+ if (oldprog && tc_skip_hw(oldprog->gen_flags))
+ oldprog = NULL;
+ if (!prog && !oldprog)
+ return 0;
- return 0;
+ return cls_bpf_offload_cmd(tp, prog, oldprog, extack);
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
- struct cls_bpf_prog *prog)
+ struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
int err;
- if (!prog->offloaded)
- return;
-
- err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
- if (err) {
+ err = cls_bpf_offload_cmd(tp, NULL, prog, extack);
+ if (err)
pr_err("Stopping hardware offload failed: %d\n", err);
- return;
- }
-
- prog->offloaded = false;
}
static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct cls_bpf_prog *prog)
{
- if (!prog->offloaded)
- return;
+ struct tcf_block *block = tp->chain->block;
+ struct tc_cls_bpf_offload cls_bpf = {};
- cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+ tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL);
+ cls_bpf.command = TC_CLSBPF_STATS;
+ cls_bpf.exts = &prog->exts;
+ cls_bpf.prog = prog->filter;
+ cls_bpf.name = prog->bpf_name;
+ cls_bpf.exts_integrated = prog->exts_integrated;
+
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
}
static int cls_bpf_init(struct tcf_proto *tp)
@@ -258,11 +253,8 @@ static int cls_bpf_init(struct tcf_proto *tp)
return 0;
}
-static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
+static void cls_bpf_free_parms(struct cls_bpf_prog *prog)
{
- tcf_exts_destroy(&prog->exts);
- tcf_exts_put_net(&prog->exts);
-
if (cls_bpf_is_ebpf(prog))
bpf_prog_put(prog->filter);
else
@@ -270,6 +262,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
kfree(prog->bpf_name);
kfree(prog->bpf_ops);
+}
+
+static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
+{
+ tcf_exts_destroy(&prog->exts);
+ tcf_exts_put_net(&prog->exts);
+
+ cls_bpf_free_parms(prog);
kfree(prog);
}
@@ -290,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
tcf_queue_work(&prog->work);
}
-static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, prog->handle);
- cls_bpf_stop_offload(tp, prog);
+ cls_bpf_stop_offload(tp, prog, extack);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
if (tcf_exts_get_net(&prog->exts))
@@ -304,22 +305,24 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
__cls_bpf_delete_prog(prog);
}
-static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
- __cls_bpf_delete(tp, arg);
+ __cls_bpf_delete(tp, arg, extack);
*last = list_empty(&head->plist);
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static void cls_bpf_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
list_for_each_entry_safe(prog, tmp, &head->plist, link)
- __cls_bpf_delete(tp, prog);
+ __cls_bpf_delete(tp, prog, extack);
idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
@@ -404,15 +407,16 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
prog->bpf_name = name;
prog->filter = fp;
- if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS))
- netif_keep_dst(qdisc_dev(tp->q));
+ if (fp->dst_needed)
+ tcf_block_netif_keep_dst(tp->chain->block);
return 0;
}
static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, bool ovr)
+ struct nlattr **tb, struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
bool is_bpf, is_ebpf, have_exts = false;
u32 gen_flags = 0;
@@ -423,7 +427,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr);
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
if (ret < 0)
return ret;
@@ -461,7 +465,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *oldprog = *arg;
@@ -509,17 +513,14 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
prog->handle = handle;
}
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
+ ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (ret < 0)
goto errout_idr;
- ret = cls_bpf_offload(tp, prog, oldprog);
- if (ret) {
- if (!oldprog)
- idr_remove_ext(&head->handle_idr, prog->handle);
- __cls_bpf_delete_prog(prog);
- return ret;
- }
+ ret = cls_bpf_offload(tp, prog, oldprog, extack);
+ if (ret)
+ goto errout_parms;
if (!tc_in_hw(prog->gen_flags))
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -537,6 +538,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
*arg = prog;
return 0;
+errout_parms:
+ cls_bpf_free_parms(prog);
errout_idr:
if (!oldprog)
idr_remove_ext(&head->handle_idr, prog->handle);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 309d5899265f..762da5c0cf5e 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -91,7 +91,8 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root)
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -121,7 +122,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
+ extack);
if (err < 0)
goto errout;
@@ -141,7 +143,8 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static void cls_cgroup_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -154,7 +157,8 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
}
}
-static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 25c2a888e1f0..cd5fe383afdd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -401,7 +401,7 @@ static void flow_destroy_filter(struct rcu_head *head)
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *fold, *fnew;
@@ -454,7 +454,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err2;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
+ extack);
if (err < 0)
goto err2;
@@ -526,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
- netif_keep_dst(qdisc_dev(tp->q));
+ tcf_block_netif_keep_dst(tp->chain->block);
if (tb[TCA_FLOW_KEYS]) {
fnew->keymask = keymask;
@@ -574,7 +575,8 @@ err1:
return err;
}
-static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f = arg;
@@ -598,7 +600,7 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp)
+static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 543a3e875d05..dc9acaafc0a8 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -166,6 +166,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
* so do it rather here.
*/
skb_key.basic.n_proto = skb->protocol;
+ skb_flow_dissect_tunnel_info(skb, &head->dissector, &skb_key);
skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
@@ -217,30 +218,33 @@ static void fl_destroy_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
&cls_flower, false);
+ tcf_block_offload_dec(block, &f->flags);
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
struct flow_dissector *dissector,
struct fl_flow_key *mask,
- struct cls_fl_filter *f)
+ struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(f->flags);
int err;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
cls_flower.dissector = dissector;
@@ -252,10 +256,10 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
&cls_flower, skip_sw);
if (err < 0) {
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, NULL);
return err;
} else if (err > 0) {
- f->flags |= TCA_CLS_FLAGS_IN_HW;
+ tcf_block_offload_inc(block, &f->flags);
}
if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
@@ -269,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.exts = &f->exts;
@@ -279,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
&cls_flower, false);
}
-static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, f->handle);
list_del_rcu(&f->list);
if (!tc_skip_hw(f->flags))
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, extack);
tcf_unbind_filter(tp, &f->res);
if (tcf_exts_get_net(&f->exts))
call_rcu(&f->rcu, fl_destroy_filter);
@@ -312,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
schedule_work(&head->work);
}
-static void fl_destroy(struct tcf_proto *tp)
+static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f, *next;
list_for_each_entry_safe(f, next, &head->filters, list)
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
idr_destroy(&head->handle_idr);
__module_get(THIS_MODULE);
@@ -524,13 +529,14 @@ static void fl_set_key_ip(struct nlattr **tb,
}
static int fl_set_key(struct net *net, struct nlattr **tb,
- struct fl_flow_key *key, struct fl_flow_key *mask)
+ struct fl_flow_key *key, struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
{
__be16 ethertype;
int ret = 0;
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FLOWER_INDEV]) {
- int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+ int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
if (err < 0)
return err;
key->indev_ifindex = err;
@@ -825,11 +831,12 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
if (err < 0)
return err;
@@ -838,7 +845,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
tcf_bind_filter(tp, &f->res, base);
}
- err = fl_set_key(net, tb, &f->key, &mask->key);
+ err = fl_set_key(net, tb, &f->key, &mask->key, extack);
if (err)
return err;
@@ -851,7 +858,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = *arg;
@@ -914,7 +921,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
}
- err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+ err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (err)
goto errout_idr;
@@ -938,7 +946,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
err = fl_hw_replace_filter(tp,
&head->dissector,
&mask.key,
- fnew);
+ fnew,
+ extack);
if (err)
goto errout_idr;
}
@@ -951,7 +960,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
if (!tc_skip_hw(fold->flags))
- fl_hw_destroy_filter(tp, fold);
+ fl_hw_destroy_filter(tp, fold, NULL);
}
*arg = fnew;
@@ -981,7 +990,8 @@ errout_tb:
return err;
}
-static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = arg;
@@ -989,7 +999,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
if (!tc_skip_sw(f->flags))
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
*last = list_empty(&head->filters);
return 0;
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 20f0de1a960a..8b207723fbc2 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fw_destroy(struct tcf_proto *tp)
+static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
@@ -172,7 +172,8 @@ static void fw_destroy(struct tcf_proto *tp)
kfree_rcu(head, rcu);
}
-static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = arg;
@@ -218,13 +219,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
static int fw_set_parms(struct net *net, struct tcf_proto *tp,
struct fw_filter *f, struct nlattr **tb,
- struct nlattr **tca, unsigned long base, bool ovr)
+ struct nlattr **tca, unsigned long base, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
u32 mask;
int err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
+ extack);
if (err < 0)
return err;
@@ -236,7 +239,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FW_INDEV]) {
int ret;
- ret = tcf_change_indev(net, tb[TCA_FW_INDEV]);
+ ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
if (ret < 0)
return ret;
f->ifindex = ret;
@@ -257,7 +260,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr)
+ bool ovr, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
@@ -296,7 +299,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return err;
}
- err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr);
+ err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
if (err < 0) {
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
@@ -345,7 +348,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
f->id = handle;
f->tp = tp;
- err = fw_set_parms(net, tp, f, tb, tca, base, ovr);
+ err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 66d4e0099158..2ba721a590a7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -71,28 +71,31 @@ static void mall_destroy_rcu(struct rcu_head *rcu)
static void mall_destroy_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
- unsigned long cookie)
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_mall.common, tp);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
+ tcf_block_offload_dec(block, &head->flags);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
- unsigned long cookie)
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(head->flags);
int err;
- tc_cls_common_offload_init(&cls_mall.common, tp);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
@@ -100,10 +103,10 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
&cls_mall, skip_sw);
if (err < 0) {
- mall_destroy_hw_filter(tp, head, cookie);
+ mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
} else if (err > 0) {
- head->flags |= TCA_CLS_FLAGS_IN_HW;
+ tcf_block_offload_inc(block, &head->flags);
}
if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
@@ -112,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return 0;
}
-static void mall_destroy(struct tcf_proto *tp)
+static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -120,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp)
return;
if (!tc_skip_hw(head->flags))
- mall_destroy_hw_filter(tp, head, (unsigned long) head);
+ mall_destroy_hw_filter(tp, head, (unsigned long) head, extack);
if (tcf_exts_get_net(&head->exts))
call_rcu(&head->rcu, mall_destroy_rcu);
@@ -141,11 +144,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
if (err < 0)
return err;
@@ -159,7 +163,7 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -197,12 +201,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
new->handle = handle;
new->flags = flags;
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr);
+ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (err)
goto err_set_parms;
if (!tc_skip_hw(new->flags)) {
- err = mall_replace_hw_filter(tp, new, (unsigned long) new);
+ err = mall_replace_hw_filter(tp, new, (unsigned long)new,
+ extack);
if (err)
goto err_replace_hw_filter;
}
@@ -222,7 +228,8 @@ err_exts_init:
return err;
}
-static int mall_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index ac9a5b8825b9..21a03a8ee029 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void route4_destroy(struct tcf_proto *tp)
+static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
@@ -316,7 +316,8 @@ static void route4_destroy(struct tcf_proto *tp)
kfree_rcu(head, rcu);
}
-static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = arg;
@@ -389,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct route4_filter *f,
u32 handle, struct route4_head *head,
struct nlattr **tb, struct nlattr *est, int new,
- bool ovr)
+ bool ovr, struct netlink_ext_ack *extack)
{
u32 id = 0, to = 0, nhandle = 0x8000;
struct route4_filter *fp;
@@ -397,7 +398,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
if (err < 0)
return err;
@@ -471,7 +472,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -515,7 +517,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
}
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], new, ovr);
+ tca[TCA_RATE], new, ovr, extack);
if (err < 0)
goto errout;
@@ -527,7 +529,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (f->handle < f1->handle)
break;
- netif_keep_dst(qdisc_dev(tp->q));
+ tcf_block_netif_keep_dst(tp->chain->block);
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index cf325625c99d..4f1297657c27 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
__rsvp_delete_filter(f);
}
-static void rsvp_destroy(struct tcf_proto *tp)
+static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
@@ -350,7 +350,8 @@ static void rsvp_destroy(struct tcf_proto *tp)
kfree_rcu(data, rcu);
}
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_filter *nfp, *f = arg;
@@ -486,7 +487,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
struct rsvp_filter *f, *nfp;
@@ -511,7 +512,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
if (err < 0)
goto errout2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 67467ae24c97..b49cc990a000 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -193,7 +193,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = arg;
@@ -246,7 +247,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
{
bool last;
- return tcindex_delete(tp, arg, &last);
+ return tcindex_delete(tp, arg, &last, NULL);
}
static void __tcindex_destroy(struct rcu_head *head)
@@ -322,7 +323,7 @@ static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_filter_result cr;
@@ -334,7 +335,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
if (err < 0)
goto errout;
@@ -520,7 +521,8 @@ errout:
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -540,7 +542,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], ovr);
+ tca[TCA_RATE], ovr, extack);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
@@ -579,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp)
+static void tcindex_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4f4247..e3c5e390ec23 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#include <linux/netdevice.h>
#include <linux/idr.h>
struct tc_u_knode {
@@ -88,6 +87,7 @@ struct tc_u_hnode {
unsigned int divisor;
struct idr handle_idr;
struct rcu_head rcu;
+ u32 flags;
/* The 'ht' field MUST be the last field in structure to allow for
* more entries allocated at end of structure.
*/
@@ -487,12 +487,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -502,7 +503,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
- u32 flags)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
@@ -510,7 +511,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
bool offloaded = false;
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_NEW_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -518,7 +519,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_clear_hw_hnode(tp, h);
+ u32_clear_hw_hnode(tp, h, NULL);
return err;
} else if (err > 0) {
offloaded = true;
@@ -530,27 +531,30 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
return 0;
}
-static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_KNODE;
- cls_u32.knode.handle = handle;
+ cls_u32.knode.handle = n->handle;
tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
+ tcf_block_offload_dec(block, &n->flags);
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
- u32 flags)
+ u32 flags, struct netlink_ext_ack *extack)
{
+ struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
bool skip_sw = tc_skip_sw(flags);
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_REPLACE_KNODE;
cls_u32.knode.handle = n->handle;
cls_u32.knode.fshift = n->fshift;
@@ -564,14 +568,14 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.knode.sel = &n->sel;
cls_u32.knode.exts = &n->exts;
if (n->ht_down)
- cls_u32.knode.link_handle = n->ht_down->handle;
+ cls_u32.knode.link_handle = ht->handle;
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_remove_hw_knode(tp, n->handle);
+ u32_remove_hw_knode(tp, n, NULL);
return err;
} else if (err > 0) {
- n->flags |= TCA_CLS_FLAGS_IN_HW;
+ tcf_block_offload_inc(block, &n->flags);
}
if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
@@ -580,7 +584,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
return 0;
}
-static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_knode *n;
unsigned int h;
@@ -590,7 +595,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
- u32_remove_hw_knode(tp, n->handle);
+ u32_remove_hw_knode(tp, n, extack);
idr_remove_ext(&ht->handle_idr, n->handle);
if (tcf_exts_get_net(&n->exts))
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
@@ -600,7 +605,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
}
}
-static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode __rcu **hn;
@@ -608,14 +614,14 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
WARN_ON(ht->refcnt);
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
for (phn = rtnl_dereference(*hn);
phn;
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
- u32_clear_hw_hnode(tp, ht);
+ u32_clear_hw_hnode(tp, ht, extack);
idr_destroy(&ht->handle_idr);
idr_remove_ext(&tp_c->handle_idr, ht->handle);
RCU_INIT_POINTER(*hn, ht->next);
@@ -638,7 +644,7 @@ static bool ht_empty(struct tc_u_hnode *ht)
return true;
}
-static void u32_destroy(struct tcf_proto *tp)
+static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -646,7 +652,7 @@ static void u32_destroy(struct tcf_proto *tp)
WARN_ON(root_ht == NULL);
if (root_ht && --root_ht->refcnt == 0)
- u32_destroy_hnode(tp, root_ht);
+ u32_destroy_hnode(tp, root_ht, extack);
if (--tp_c->refcnt == 0) {
struct tc_u_hnode *ht;
@@ -657,7 +663,7 @@ static void u32_destroy(struct tcf_proto *tp)
ht;
ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
}
while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
@@ -672,7 +678,8 @@ static void u32_destroy(struct tcf_proto *tp)
tp->data = NULL;
}
-static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = arg;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -683,18 +690,21 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
goto out;
if (TC_U32_KEY(ht->handle)) {
- u32_remove_hw_knode(tp, ht->handle);
+ u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
goto out;
}
- if (root_ht == ht)
+ if (root_ht == ht) {
+ NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
return -EINVAL;
+ }
if (ht->refcnt == 1) {
ht->refcnt--;
- u32_destroy_hnode(tp, ht);
+ u32_destroy_hnode(tp, ht, extack);
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
return -EBUSY;
}
@@ -765,11 +775,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct tc_u_hnode *ht,
struct tc_u_knode *n, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
if (err < 0)
return err;
@@ -777,14 +788,18 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
- if (TC_U32_KEY(handle))
+ if (TC_U32_KEY(handle)) {
+ NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
return -EINVAL;
+ }
if (handle) {
ht_down = u32_lookup_ht(ht->tp_c, handle);
- if (ht_down == NULL)
+ if (!ht_down) {
+ NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
return -EINVAL;
+ }
ht_down->refcnt++;
}
@@ -802,7 +817,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_U32_INDEV]) {
int ret;
- ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
+ ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
if (ret < 0)
return -EINVAL;
n->ifindex = ret;
@@ -841,8 +856,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
struct tc_u_knode *n)
{
- struct tc_u_knode *new;
+ struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
struct tc_u32_sel *s = &n->sel;
+ struct tc_u_knode *new;
new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
GFP_KERNEL);
@@ -860,11 +876,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
new->fshift = n->fshift;
new->res = n->res;
new->flags = n->flags;
- RCU_INIT_POINTER(new->ht_down, n->ht_down);
+ RCU_INIT_POINTER(new->ht_down, ht);
/* bump reference count as long as we hold pointer to structure */
- if (new->ht_down)
- new->ht_down->refcnt++;
+ if (ht)
+ ht->refcnt++;
#ifdef CONFIG_CLS_U32_PERF
/* Statistics may be incremented by readers during update
@@ -893,7 +909,8 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -907,28 +924,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
size_t size;
#endif
- if (opt == NULL)
- return handle ? -EINVAL : 0;
+ if (!opt) {
+ if (handle) {
+ NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
+ return -EINVAL;
+ } else {
+ return 0;
+ }
+ }
- err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
+ err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack);
if (err < 0)
return err;
if (tb[TCA_U32_FLAGS]) {
flags = nla_get_u32(tb[TCA_U32_FLAGS]);
- if (!tc_flags_valid(flags))
+ if (!tc_flags_valid(flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
return -EINVAL;
+ }
}
n = *arg;
if (n) {
struct tc_u_knode *new;
- if (TC_U32_KEY(n->handle) == 0)
+ if (TC_U32_KEY(n->handle) == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
return -EINVAL;
+ }
- if (n->flags != flags)
+ if (n->flags != flags) {
+ NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
return -EINVAL;
+ }
new = u32_init_knode(tp, n);
if (!new)
@@ -936,14 +965,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
err = u32_set_parms(net, tp, base,
rtnl_dereference(n->ht_up), new, tb,
- tca[TCA_RATE], ovr);
+ tca[TCA_RATE], ovr, extack);
if (err) {
u32_destroy_key(tp, new, false);
return err;
}
- err = u32_replace_hw_knode(tp, new, flags);
+ err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
u32_destroy_key(tp, new, false);
return err;
@@ -962,10 +991,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_U32_DIVISOR]) {
unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
- if (--divisor > 0x100)
+ if (--divisor > 0x100) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
return -EINVAL;
- if (TC_U32_KEY(handle))
+ }
+ if (TC_U32_KEY(handle)) {
+ NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
return -EINVAL;
+ }
ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
if (ht == NULL)
return -ENOBUFS;
@@ -989,8 +1022,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->handle = handle;
ht->prio = tp->prio;
idr_init(&ht->handle_idr);
+ ht->flags = flags;
- err = u32_replace_hw_hnode(tp, ht, flags);
+ err = u32_replace_hw_hnode(tp, ht, flags, extack);
if (err) {
idr_remove_ext(&tp_c->handle_idr, handle);
kfree(ht);
@@ -1011,20 +1045,26 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
htid = ht->handle;
} else {
ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
- if (ht == NULL)
+ if (!ht) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
return -EINVAL;
+ }
}
} else {
ht = rtnl_dereference(tp->root);
htid = ht->handle;
}
- if (ht->divisor < TC_U32_HASH(htid))
+ if (ht->divisor < TC_U32_HASH(htid)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
return -EINVAL;
+ }
if (handle) {
- if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
+ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
+ NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
return -EINVAL;
+ }
handle = htid | TC_U32_NODE(handle);
err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
handle, handle + 1,
@@ -1035,6 +1075,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
handle = gen_new_kid(ht, htid);
if (tb[TCA_U32_SEL] == NULL) {
+ NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
err = -EINVAL;
goto erridr;
}
@@ -1083,12 +1124,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
+ err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
+ extack);
if (err == 0) {
struct tc_u_knode __rcu **ins;
struct tc_u_knode *pins;
- err = u32_replace_hw_knode(tp, n, flags);
+ err = u32_replace_hw_knode(tp, n, flags, extack);
if (err)
goto errhw;
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index df3110d69585..07c10bac06a0 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
return 0;
- return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+ return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len);
}
static struct tcf_ematch_ops em_nbyte_ops = {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b6c4f536876b..d512f49ee83c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -393,13 +393,16 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
static struct qdisc_rate_table *qdisc_rtab_list;
struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
- struct nlattr *tab)
+ struct nlattr *tab,
+ struct netlink_ext_ack *extack)
{
struct qdisc_rate_table *rtab;
if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
- nla_len(tab) != TC_RTAB_SIZE)
+ nla_len(tab) != TC_RTAB_SIZE) {
+ NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
return NULL;
+ }
for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
@@ -418,6 +421,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
r->linklayer = __detect_linklayer(r, rtab->data);
rtab->next = qdisc_rtab_list;
qdisc_rtab_list = rtab;
+ } else {
+ NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
}
return rtab;
}
@@ -449,7 +454,8 @@ static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
[TCA_STAB_DATA] = { .type = NLA_BINARY },
};
-static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
+static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_STAB_MAX + 1];
struct qdisc_size_table *stab;
@@ -458,23 +464,29 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
u16 *tab = NULL;
int err;
- err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
+ err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
if (err < 0)
return ERR_PTR(err);
- if (!tb[TCA_STAB_BASE])
+ if (!tb[TCA_STAB_BASE]) {
+ NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
return ERR_PTR(-EINVAL);
+ }
s = nla_data(tb[TCA_STAB_BASE]);
if (s->tsize > 0) {
- if (!tb[TCA_STAB_DATA])
+ if (!tb[TCA_STAB_DATA]) {
+ NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
return ERR_PTR(-EINVAL);
+ }
tab = nla_data(tb[TCA_STAB_DATA]);
tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
}
- if (tsize != s->tsize || (!tab && tsize > 0))
+ if (tsize != s->tsize || (!tab && tsize > 0)) {
+ NL_SET_ERR_MSG(extack, "Invalid size of size table");
return ERR_PTR(-EINVAL);
+ }
list_for_each_entry(stab, &qdisc_stab_list, list) {
if (memcmp(&stab->szopts, s, sizeof(*s)))
@@ -669,7 +681,7 @@ int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
unsigned int size = 4;
clhash->hash = qdisc_class_hash_alloc(size);
- if (clhash->hash == NULL)
+ if (!clhash->hash)
return -ENOMEM;
clhash->hashsize = size;
clhash->hashmask = size - 1;
@@ -779,6 +791,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
struct qdisc_size_table *stab;
+ u32 block_index;
__u32 qlen;
cond_resched();
@@ -795,9 +808,23 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
tcm->tcm_info = refcount_read(&q->refcnt);
if (nla_put_string(skb, TCA_KIND, q->ops->id))
goto nla_put_failure;
+ if (q->ops->ingress_block_get) {
+ block_index = q->ops->ingress_block_get(q);
+ if (block_index &&
+ nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
+ goto nla_put_failure;
+ }
+ if (q->ops->egress_block_get) {
+ block_index = q->ops->egress_block_get(q);
+ if (block_index &&
+ nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
+ goto nla_put_failure;
+ }
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto nla_put_failure;
- qlen = q->q.qlen;
+ if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
+ goto nla_put_failure;
+ qlen = qdisc_qlen_sum(q);
stab = rtnl_dereference(q->stab);
if (stab && qdisc_dump_stab(skb, stab) < 0)
@@ -896,7 +923,8 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
- struct Qdisc *new, struct Qdisc *old)
+ struct Qdisc *new, struct Qdisc *old,
+ struct netlink_ext_ack *extack)
{
struct Qdisc *q = old;
struct net *net = dev_net(dev);
@@ -911,8 +939,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
(new && new->flags & TCQ_F_INGRESS)) {
num_q = 1;
ingress = 1;
- if (!dev_ingress_queue(dev))
+ if (!dev_ingress_queue(dev)) {
+ NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
return -ENOENT;
+ }
}
if (dev->flags & IFF_UP)
@@ -954,14 +984,22 @@ skip:
} else {
const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+ /* Only support running class lockless if parent is lockless */
+ if (new && (new->flags & TCQ_F_NOLOCK) &&
+ parent && !(parent->flags & TCQ_F_NOLOCK))
+ new->flags &= ~TCQ_F_NOLOCK;
+
err = -EOPNOTSUPP;
if (cops && cops->graft) {
unsigned long cl = cops->find(parent, classid);
- if (cl)
- err = cops->graft(parent, cl, new, &old);
- else
+ if (cl) {
+ err = cops->graft(parent, cl, new, &old,
+ extack);
+ } else {
+ NL_SET_ERR_MSG(extack, "Specified class not found");
err = -ENOENT;
+ }
}
if (!err)
notify_and_destroy(net, skb, n, classid, old, new);
@@ -969,6 +1007,40 @@ skip:
return err;
}
+static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
+ struct netlink_ext_ack *extack)
+{
+ u32 block_index;
+
+ if (tca[TCA_INGRESS_BLOCK]) {
+ block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
+
+ if (!block_index) {
+ NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
+ return -EINVAL;
+ }
+ if (!sch->ops->ingress_block_set) {
+ NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
+ return -EOPNOTSUPP;
+ }
+ sch->ops->ingress_block_set(sch, block_index);
+ }
+ if (tca[TCA_EGRESS_BLOCK]) {
+ block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
+
+ if (!block_index) {
+ NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
+ return -EINVAL;
+ }
+ if (!sch->ops->egress_block_set) {
+ NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
+ return -EOPNOTSUPP;
+ }
+ sch->ops->egress_block_set(sch, block_index);
+ }
+ return 0;
+}
+
/* lockdep annotation is needed for ingress; egress gets it only for name */
static struct lock_class_key qdisc_tx_lock;
static struct lock_class_key qdisc_rx_lock;
@@ -982,7 +1054,8 @@ static struct lock_class_key qdisc_rx_lock;
static struct Qdisc *qdisc_create(struct net_device *dev,
struct netdev_queue *dev_queue,
struct Qdisc *p, u32 parent, u32 handle,
- struct nlattr **tca, int *errp)
+ struct nlattr **tca, int *errp,
+ struct netlink_ext_ack *extack)
{
int err;
struct nlattr *kind = tca[TCA_KIND];
@@ -1020,10 +1093,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
#endif
err = -ENOENT;
- if (ops == NULL)
+ if (!ops) {
+ NL_SET_ERR_MSG(extack, "Specified qdisc not found");
goto err_out;
+ }
- sch = qdisc_alloc(dev_queue, ops);
+ sch = qdisc_alloc(dev_queue, ops, extack);
if (IS_ERR(sch)) {
err = PTR_ERR(sch);
goto err_out2;
@@ -1060,60 +1135,63 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
}
- if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
- if (qdisc_is_percpu_stats(sch)) {
- sch->cpu_bstats =
- netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
- if (!sch->cpu_bstats)
- goto err_out4;
+ err = qdisc_block_indexes_set(sch, tca, extack);
+ if (err)
+ goto err_out3;
+
+ if (ops->init) {
+ err = ops->init(sch, tca[TCA_OPTIONS], extack);
+ if (err != 0)
+ goto err_out5;
+ }
- sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
- if (!sch->cpu_qstats)
- goto err_out4;
+ if (tca[TCA_STAB]) {
+ stab = qdisc_get_stab(tca[TCA_STAB], extack);
+ if (IS_ERR(stab)) {
+ err = PTR_ERR(stab);
+ goto err_out4;
}
+ rcu_assign_pointer(sch->stab, stab);
+ }
+ if (tca[TCA_RATE]) {
+ seqcount_t *running;
- if (tca[TCA_STAB]) {
- stab = qdisc_get_stab(tca[TCA_STAB]);
- if (IS_ERR(stab)) {
- err = PTR_ERR(stab);
- goto err_out4;
- }
- rcu_assign_pointer(sch->stab, stab);
+ err = -EOPNOTSUPP;
+ if (sch->flags & TCQ_F_MQROOT) {
+ NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
+ goto err_out4;
}
- if (tca[TCA_RATE]) {
- seqcount_t *running;
-
- err = -EOPNOTSUPP;
- if (sch->flags & TCQ_F_MQROOT)
- goto err_out4;
-
- if ((sch->parent != TC_H_ROOT) &&
- !(sch->flags & TCQ_F_INGRESS) &&
- (!p || !(p->flags & TCQ_F_MQROOT)))
- running = qdisc_root_sleeping_running(sch);
- else
- running = &sch->running;
-
- err = gen_new_estimator(&sch->bstats,
- sch->cpu_bstats,
- &sch->rate_est,
- NULL,
- running,
- tca[TCA_RATE]);
- if (err)
- goto err_out4;
+
+ if (sch->parent != TC_H_ROOT &&
+ !(sch->flags & TCQ_F_INGRESS) &&
+ (!p || !(p->flags & TCQ_F_MQROOT)))
+ running = qdisc_root_sleeping_running(sch);
+ else
+ running = &sch->running;
+
+ err = gen_new_estimator(&sch->bstats,
+ sch->cpu_bstats,
+ &sch->rate_est,
+ NULL,
+ running,
+ tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
+ goto err_out4;
}
+ }
- qdisc_hash_add(sch, false);
+ qdisc_hash_add(sch, false);
- return sch;
- }
+ return sch;
+
+err_out5:
/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
if (ops->destroy)
ops->destroy(sch);
err_out3:
dev_put(dev);
- kfree((char *) sch - sch->padded);
+ qdisc_free(sch);
err_out2:
module_put(ops->owner);
err_out:
@@ -1121,8 +1199,6 @@ err_out:
return NULL;
err_out4:
- free_percpu(sch->cpu_bstats);
- free_percpu(sch->cpu_qstats);
/*
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
@@ -1133,21 +1209,28 @@ err_out4:
goto err_out3;
}
-static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
+static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
+ struct netlink_ext_ack *extack)
{
struct qdisc_size_table *ostab, *stab = NULL;
int err = 0;
if (tca[TCA_OPTIONS]) {
- if (sch->ops->change == NULL)
+ if (!sch->ops->change) {
+ NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
return -EINVAL;
- err = sch->ops->change(sch, tca[TCA_OPTIONS]);
+ }
+ if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
+ NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
+ return -EOPNOTSUPP;
+ }
+ err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
if (err)
return err;
}
if (tca[TCA_STAB]) {
- stab = qdisc_get_stab(tca[TCA_STAB]);
+ stab = qdisc_get_stab(tca[TCA_STAB], extack);
if (IS_ERR(stab))
return PTR_ERR(stab);
}
@@ -1245,8 +1328,10 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
if (clid != TC_H_ROOT) {
if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
p = qdisc_lookup(dev, TC_H_MAJ(clid));
- if (!p)
+ if (!p) {
+ NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
return -ENOENT;
+ }
q = qdisc_leaf(p, clid);
} else if (dev_ingress_queue(dev)) {
q = dev_ingress_queue(dev)->qdisc_sleeping;
@@ -1254,26 +1339,38 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
} else {
q = dev->qdisc;
}
- if (!q)
+ if (!q) {
+ NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
return -ENOENT;
+ }
- if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
+ if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
+ NL_SET_ERR_MSG(extack, "Invalid handle");
return -EINVAL;
+ }
} else {
q = qdisc_lookup(dev, tcm->tcm_handle);
- if (!q)
+ if (!q) {
+ NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
return -ENOENT;
+ }
}
- if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
+ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ NL_SET_ERR_MSG(extack, "Invalid qdisc name");
return -EINVAL;
+ }
if (n->nlmsg_type == RTM_DELQDISC) {
- if (!clid)
+ if (!clid) {
+ NL_SET_ERR_MSG(extack, "Classid cannot be zero");
return -EINVAL;
- if (q->handle == 0)
+ }
+ if (q->handle == 0) {
+ NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
return -ENOENT;
- err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
+ }
+ err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
if (err != 0)
return err;
} else {
@@ -1319,8 +1416,10 @@ replay:
if (clid != TC_H_ROOT) {
if (clid != TC_H_INGRESS) {
p = qdisc_lookup(dev, TC_H_MAJ(clid));
- if (!p)
+ if (!p) {
+ NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
return -ENOENT;
+ }
q = qdisc_leaf(p, clid);
} else if (dev_ingress_queue_create(dev)) {
q = dev_ingress_queue(dev)->qdisc_sleeping;
@@ -1335,20 +1434,31 @@ replay:
if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
if (tcm->tcm_handle) {
- if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
+ if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
return -EEXIST;
- if (TC_H_MIN(tcm->tcm_handle))
+ }
+ if (TC_H_MIN(tcm->tcm_handle)) {
+ NL_SET_ERR_MSG(extack, "Invalid minor handle");
return -EINVAL;
+ }
q = qdisc_lookup(dev, tcm->tcm_handle);
if (!q)
goto create_n_graft;
- if (n->nlmsg_flags & NLM_F_EXCL)
+ if (n->nlmsg_flags & NLM_F_EXCL) {
+ NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
return -EEXIST;
- if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
+ }
+ if (tca[TCA_KIND] &&
+ nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ NL_SET_ERR_MSG(extack, "Invalid qdisc name");
return -EINVAL;
+ }
if (q == p ||
- (p && check_loop(q, p, 0)))
+ (p && check_loop(q, p, 0))) {
+ NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
return -ELOOP;
+ }
qdisc_refcount_inc(q);
goto graft;
} else {
@@ -1383,33 +1493,45 @@ replay:
}
}
} else {
- if (!tcm->tcm_handle)
+ if (!tcm->tcm_handle) {
+ NL_SET_ERR_MSG(extack, "Handle cannot be zero");
return -EINVAL;
+ }
q = qdisc_lookup(dev, tcm->tcm_handle);
}
/* Change qdisc parameters */
- if (q == NULL)
+ if (!q) {
+ NL_SET_ERR_MSG(extack, "Specified qdisc not found");
return -ENOENT;
- if (n->nlmsg_flags & NLM_F_EXCL)
+ }
+ if (n->nlmsg_flags & NLM_F_EXCL) {
+ NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
return -EEXIST;
- if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
+ }
+ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ NL_SET_ERR_MSG(extack, "Invalid qdisc name");
return -EINVAL;
- err = qdisc_change(q, tca);
+ }
+ err = qdisc_change(q, tca, extack);
if (err == 0)
qdisc_notify(net, skb, n, clid, NULL, q);
return err;
create_n_graft:
- if (!(n->nlmsg_flags & NLM_F_CREATE))
+ if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
return -ENOENT;
+ }
if (clid == TC_H_INGRESS) {
- if (dev_ingress_queue(dev))
+ if (dev_ingress_queue(dev)) {
q = qdisc_create(dev, dev_ingress_queue(dev), p,
tcm->tcm_parent, tcm->tcm_parent,
- tca, &err);
- else
+ tca, &err, extack);
+ } else {
+ NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
err = -ENOENT;
+ }
} else {
struct netdev_queue *dev_queue;
@@ -1422,7 +1544,7 @@ create_n_graft:
q = qdisc_create(dev, dev_queue, p,
tcm->tcm_parent, tcm->tcm_handle,
- tca, &err);
+ tca, &err, extack);
}
if (q == NULL) {
if (err == -EAGAIN)
@@ -1431,7 +1553,7 @@ create_n_graft:
}
graft:
- err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
+ err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
if (err) {
if (q)
qdisc_destroy(q);
@@ -1683,7 +1805,7 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
cl = cops->find(q, portid);
if (!cl)
return;
- block = cops->tcf_block(q, cl);
+ block = cops->tcf_block(q, cl, NULL);
if (!block)
return;
list_for_each_entry(chain, &block->chain_list, list) {
@@ -1827,10 +1949,15 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
}
}
+ if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
+ NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
+ return -EOPNOTSUPP;
+ }
+
new_cl = cl;
err = -EOPNOTSUPP;
if (cops->change)
- err = cops->change(q, clid, portid, tca, &new_cl);
+ err = cops->change(q, clid, portid, tca, &new_cl, extack);
if (err == 0) {
tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
/* We just create a new class, need to do reverse binding. */
@@ -1966,7 +2093,6 @@ static int psched_open(struct inode *inode, struct file *file)
}
static const struct file_operations psched_fops = {
- .owner = THIS_MODULE,
.open = psched_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2dbd249c0b2f..cd49afca9617 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -82,7 +82,8 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
}
static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old)
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
@@ -191,7 +192,8 @@ static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
};
static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
@@ -281,13 +283,15 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
goto err_out;
}
- error = tcf_block_get(&flow->block, &flow->filter_list, sch);
+ error = tcf_block_get(&flow->block, &flow->filter_list, sch,
+ extack);
if (error) {
kfree(flow);
goto err_out;
}
- flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
+ flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
+ extack);
if (!flow->q)
flow->q = &noop_qdisc;
pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -356,7 +360,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
@@ -531,7 +536,8 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
return p->link.q->ops->peek(p->link.q);
}
-static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
+static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
int err;
@@ -541,12 +547,13 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
INIT_LIST_HEAD(&p->link.list);
list_add(&p->link.list, &p->flows);
p->link.q = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, sch->handle);
+ &pfifo_qdisc_ops, sch->handle, extack);
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- err = tcf_block_get(&p->link.block, &p->link.filter_list, sch);
+ err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
+ extack);
if (err)
return err;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6361be7881f1..f42025d53cfe 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1132,7 +1132,8 @@ static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
[TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
};
-static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
+static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CBQ_MAX + 1];
@@ -1143,30 +1144,39 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
q->delay_timer.function = cbq_undelay;
- if (!opt)
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
return -EINVAL;
+ }
- err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
+ err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack);
if (err < 0)
return err;
- if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
+ if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) {
+ NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete");
return -EINVAL;
+ }
r = nla_data(tb[TCA_CBQ_RATE]);
- if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
+ q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack);
+ if (!q->link.R_tab)
return -EINVAL;
+ err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack);
+ if (err)
+ goto put_rtab;
+
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
- goto put_rtab;
+ goto put_block;
q->link.sibling = &q->link;
q->link.common.classid = sch->handle;
q->link.qdisc = sch;
q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle);
+ sch->handle, NULL);
if (!q->link.q)
q->link.q = &noop_qdisc;
else
@@ -1194,6 +1204,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
cbq_addprio(q, &q->link);
return 0;
+put_block:
+ tcf_block_put(q->link.block);
+
put_rtab:
qdisc_put_rtab(q->link.R_tab);
return err;
@@ -1362,13 +1375,13 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
}
static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct cbq_class *cl = (struct cbq_class *)arg;
if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, cl->common.classid);
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ cl->common.classid, extack);
if (new == NULL)
return -ENOBUFS;
}
@@ -1443,7 +1456,7 @@ static void cbq_destroy(struct Qdisc *sch)
static int
cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, struct netlink_ext_ack *extack)
{
int err;
struct cbq_sched_data *q = qdisc_priv(sch);
@@ -1453,29 +1466,37 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
struct cbq_class *parent;
struct qdisc_rate_table *rtab = NULL;
- if (opt == NULL)
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "Mandatory qdisc options missing");
return -EINVAL;
+ }
- err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
+ err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack);
if (err < 0)
return err;
- if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE])
+ if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) {
+ NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params");
return -EOPNOTSUPP;
+ }
if (cl) {
/* Check parent */
if (parentid) {
if (cl->tparent &&
- cl->tparent->common.classid != parentid)
+ cl->tparent->common.classid != parentid) {
+ NL_SET_ERR_MSG(extack, "Invalid parent id");
return -EINVAL;
- if (!cl->tparent && parentid != TC_H_ROOT)
+ }
+ if (!cl->tparent && parentid != TC_H_ROOT) {
+ NL_SET_ERR_MSG(extack, "Parent must be root");
return -EINVAL;
+ }
}
if (tb[TCA_CBQ_RATE]) {
rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
- tb[TCA_CBQ_RTAB]);
+ tb[TCA_CBQ_RTAB], extack);
if (rtab == NULL)
return -EINVAL;
}
@@ -1487,6 +1508,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
qdisc_put_rtab(rtab);
return err;
}
@@ -1525,19 +1547,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (parentid == TC_H_ROOT)
return -EINVAL;
- if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
- tb[TCA_CBQ_LSSOPT] == NULL)
+ if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) {
+ NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing");
return -EINVAL;
+ }
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
+ rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB],
+ extack);
if (rtab == NULL)
return -EINVAL;
if (classid) {
err = -EINVAL;
if (TC_H_MAJ(classid ^ sch->handle) ||
- cbq_class_lookup(q, classid))
+ cbq_class_lookup(q, classid)) {
+ NL_SET_ERR_MSG(extack, "Specified class not found");
goto failure;
+ }
} else {
int i;
classid = TC_H_MAKE(sch->handle, 0x8000);
@@ -1549,8 +1575,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
break;
}
err = -ENOSR;
- if (i >= 0x8000)
+ if (i >= 0x8000) {
+ NL_SET_ERR_MSG(extack, "Unable to generate classid");
goto failure;
+ }
classid = classid|q->hgenerator;
}
@@ -1558,8 +1586,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (parentid) {
parent = cbq_class_lookup(q, parentid);
err = -EINVAL;
- if (parent == NULL)
+ if (!parent) {
+ NL_SET_ERR_MSG(extack, "Failed to find parentid");
goto failure;
+ }
}
err = -ENOBUFS;
@@ -1567,7 +1597,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list, sch);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
return err;
@@ -1579,6 +1609,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
+ NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
tcf_block_put(cl->block);
kfree(cl);
goto failure;
@@ -1587,7 +1618,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->R_tab = rtab;
rtab = NULL;
- cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
+ cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
+ NULL);
if (!cl->q)
cl->q = &noop_qdisc;
else
@@ -1671,7 +1703,8 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
return 0;
}
-static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg)
+static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 7a72980c1509..cdd96b9a27bc 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -219,14 +219,17 @@ static void cbs_disable_offload(struct net_device *dev,
}
static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
- const struct tc_cbs_qopt *opt)
+ const struct tc_cbs_qopt *opt,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
struct tc_cbs_qopt_offload cbs = { };
int err;
- if (!ops->ndo_setup_tc)
+ if (!ops->ndo_setup_tc) {
+ NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload");
return -EOPNOTSUPP;
+ }
cbs.queue = q->queue;
@@ -237,8 +240,10 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
cbs.sendslope = opt->sendslope;
err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload");
return err;
+ }
q->enqueue = cbs_enqueue_offload;
q->dequeue = cbs_dequeue_offload;
@@ -246,7 +251,8 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
return 0;
}
-static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
+static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
@@ -254,12 +260,14 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
struct tc_cbs_qopt *qopt;
int err;
- err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
+ err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack);
if (err < 0)
return err;
- if (!tb[TCA_CBS_PARMS])
+ if (!tb[TCA_CBS_PARMS]) {
+ NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory");
return -EINVAL;
+ }
qopt = nla_data(tb[TCA_CBS_PARMS]);
@@ -276,7 +284,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
cbs_disable_offload(dev, q);
} else {
- err = cbs_enable_offload(dev, q, qopt);
+ err = cbs_enable_offload(dev, q, qopt, extack);
if (err < 0)
return err;
}
@@ -291,13 +299,16 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
+static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- if (!opt)
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
return -EINVAL;
+ }
q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
@@ -306,7 +317,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
- return cbs_change(sch, opt);
+ return cbs_change(sch, opt, extack);
}
static void cbs_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index b30a2c70bd48..eafc0d17d174 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -344,7 +344,8 @@ static void choke_free(void *addr)
kvfree(addr);
}
-static int choke_change(struct Qdisc *sch, struct nlattr *opt)
+static int choke_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct choke_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CHOKE_MAX + 1];
@@ -369,6 +370,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ return -EINVAL;
+
if (ctl->limit > CHOKE_MAX_QUEUE)
return -EINVAL;
@@ -428,9 +432,10 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static int choke_init(struct Qdisc *sch, struct nlattr *opt)
+static int choke_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
- return choke_change(sch, opt);
+ return choke_change(sch, opt, extack);
}
static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index c518a1efcb9d..17cd81f84b5d 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -130,7 +130,8 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
[TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
};
-static int codel_change(struct Qdisc *sch, struct nlattr *opt)
+static int codel_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CODEL_MAX + 1];
@@ -184,7 +185,8 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static int codel_init(struct Qdisc *sch, struct nlattr *opt)
+static int codel_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct codel_sched_data *q = qdisc_priv(sch);
@@ -196,7 +198,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt)
q->params.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
- int err = codel_change(sch, opt);
+ int err = codel_change(sch, opt, extack);
if (err)
return err;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 5bbcef3dcd8c..e0b0cf8a9939 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -64,7 +64,8 @@ static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
};
static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl = (struct drr_class *)*arg;
@@ -73,17 +74,21 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
u32 quantum;
int err;
- if (!opt)
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "DRR options are required for this operation");
return -EINVAL;
+ }
- err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL);
+ err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack);
if (err < 0)
return err;
if (tb[TCA_DRR_QUANTUM]) {
quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
- if (quantum == 0)
+ if (quantum == 0) {
+ NL_SET_ERR_MSG(extack, "Specified DRR quantum cannot be zero");
return -EINVAL;
+ }
} else
quantum = psched_mtu(qdisc_dev(sch));
@@ -94,8 +99,10 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
- if (err)
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace estimator");
return err;
+ }
}
sch_tree_lock(sch);
@@ -113,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->common.classid = classid;
cl->quantum = quantum;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, classid);
+ &pfifo_qdisc_ops, classid,
+ NULL);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
else
@@ -125,6 +133,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace estimator");
qdisc_destroy(cl->qdisc);
kfree(cl);
return err;
@@ -172,12 +181,15 @@ static unsigned long drr_search_class(struct Qdisc *sch, u32 classid)
return (unsigned long)drr_find_class(sch, classid);
}
-static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct drr_sched *q = qdisc_priv(sch);
- if (cl)
+ if (cl) {
+ NL_SET_ERR_MSG(extack, "DRR classid must be zero");
return NULL;
+ }
return q->block;
}
@@ -201,13 +213,14 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
}
static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old)
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
{
struct drr_class *cl = (struct drr_class *)arg;
if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, cl->common.classid);
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ cl->common.classid, NULL);
if (new == NULL)
new = &noop_qdisc;
}
@@ -408,12 +421,13 @@ out:
return NULL;
}
-static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct drr_sched *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
err = qdisc_class_hash_init(&q->clhash);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index fb4fb71c68cf..049714c57075 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -61,7 +61,8 @@ static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
/* ------------------------- Class/flow operations ------------------------- */
static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old)
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -70,7 +71,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
if (new == NULL) {
new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle);
+ sch->handle, NULL);
if (new == NULL)
new = &noop_qdisc;
}
@@ -112,7 +113,8 @@ static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
};
static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
struct nlattr *opt = tca[TCA_OPTIONS];
@@ -184,7 +186,8 @@ ignore:
}
}
-static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -330,7 +333,8 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
return p->q->ops->peek(p->q);
}
-static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
+static int dsmark_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
struct nlattr *tb[TCA_DSMARK_MAX + 1];
@@ -344,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
- err = tcf_block_get(&p->block, &p->filter_list, sch);
+ err = tcf_block_get(&p->block, &p->filter_list, sch, extack);
if (err)
return err;
@@ -377,7 +381,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
p->default_index = default_index;
p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
- p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
+ p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle,
+ NULL);
if (p->q == NULL)
p->q = &noop_qdisc;
else
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 1e37247656f8..24893d3b5d22 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -55,7 +55,8 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_CN;
}
-static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
+static int fifo_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
bool bypass;
bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
@@ -157,7 +158,7 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
- ret = q->ops->change(q, nla);
+ ret = q->ops->change(q, nla, NULL);
kfree(nla);
}
return ret;
@@ -165,12 +166,14 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
EXPORT_SYMBOL(fifo_set_limit);
struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
- unsigned int limit)
+ unsigned int limit,
+ struct netlink_ext_ack *extack)
{
struct Qdisc *q;
int err = -ENOMEM;
- q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1));
+ q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1),
+ extack);
if (q) {
err = fifo_set_limit(q, limit);
if (err < 0) {
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 263d16e3219e..a366e4c9413a 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -685,7 +685,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
};
-static int fq_change(struct Qdisc *sch, struct nlattr *opt)
+static int fq_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_MAX + 1];
@@ -788,7 +789,8 @@ static void fq_destroy(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog);
}
-static int fq_init(struct Qdisc *sch, struct nlattr *opt)
+static int fq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct fq_sched_data *q = qdisc_priv(sch);
int err;
@@ -811,7 +813,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
if (opt)
- err = fq_change(sch, opt);
+ err = fq_change(sch, opt, extack);
else
err = fq_resize(sch, q->fq_trees_log);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 0305d791ea94..22fa13cf5d8b 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -377,7 +377,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
[TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 },
};
-static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
+static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
@@ -458,7 +459,8 @@ static void fq_codel_destroy(struct Qdisc *sch)
kvfree(q->flows);
}
-static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
+static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
int i;
@@ -477,12 +479,12 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
q->cparams.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
- int err = fq_codel_change(sch, opt);
+ int err = fq_codel_change(sch, opt, extack);
if (err)
return err;
}
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
@@ -595,7 +597,8 @@ static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3839cbbdc32b..190570f21b20 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,10 +26,13 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
+#include <linux/skb_array.h>
+#include <linux/if_macvlan.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <trace/events/qdisc.h>
+#include <net/xfrm.h>
/* Qdisc to use by default */
const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
@@ -46,17 +49,115 @@ EXPORT_SYMBOL(default_qdisc_ops);
* - updates to tree and tree walking are only done under the rtnl mutex.
*/
-static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
+{
+ const struct netdev_queue *txq = q->dev_queue;
+ spinlock_t *lock = NULL;
+ struct sk_buff *skb;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
+ }
+
+ skb = skb_peek(&q->skb_bad_txq);
+ if (skb) {
+ /* check the reason of requeuing without tx lock first */
+ txq = skb_get_tx_queue(txq->dev, skb);
+ if (!netif_xmit_frozen_or_stopped(txq)) {
+ skb = __skb_dequeue(&q->skb_bad_txq);
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_dec(q, skb);
+ qdisc_qstats_cpu_qlen_dec(q);
+ } else {
+ qdisc_qstats_backlog_dec(q, skb);
+ q->q.qlen--;
+ }
+ } else {
+ skb = NULL;
+ }
+ }
+
+ if (lock)
+ spin_unlock(lock);
+
+ return skb;
+}
+
+static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
+{
+ struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
+
+ if (unlikely(skb))
+ skb = __skb_dequeue_bad_txq(q);
+
+ return skb;
+}
+
+static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
+ struct sk_buff *skb)
+{
+ spinlock_t *lock = NULL;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
+ }
+
+ __skb_queue_tail(&q->skb_bad_txq, skb);
+
+ if (lock)
+ spin_unlock(lock);
+}
+
+static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- q->gso_skb = skb;
- q->qstats.requeues++;
- qdisc_qstats_backlog_inc(q, skb);
- q->q.qlen++; /* it's still part of the queue */
+ while (skb) {
+ struct sk_buff *next = skb->next;
+
+ __skb_queue_tail(&q->gso_skb, skb);
+ q->qstats.requeues++;
+ qdisc_qstats_backlog_inc(q, skb);
+ q->q.qlen++; /* it's still part of the queue */
+
+ skb = next;
+ }
+ __netif_schedule(q);
+
+ return 0;
+}
+
+static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
+{
+ spinlock_t *lock = qdisc_lock(q);
+
+ spin_lock(lock);
+ while (skb) {
+ struct sk_buff *next = skb->next;
+
+ __skb_queue_tail(&q->gso_skb, skb);
+
+ qdisc_qstats_cpu_requeues_inc(q);
+ qdisc_qstats_cpu_backlog_inc(q, skb);
+ qdisc_qstats_cpu_qlen_inc(q);
+
+ skb = next;
+ }
+ spin_unlock(lock);
+
__netif_schedule(q);
return 0;
}
+static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+{
+ if (q->flags & TCQ_F_NOLOCK)
+ return dev_requeue_skb_locked(skb, q);
+ else
+ return __dev_requeue_skb(skb, q);
+}
+
static void try_bulk_dequeue_skb(struct Qdisc *q,
struct sk_buff *skb,
const struct netdev_queue *txq,
@@ -94,9 +195,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
if (!nskb)
break;
if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
- q->skb_bad_txq = nskb;
- qdisc_qstats_backlog_inc(q, nskb);
- q->q.qlen++;
+ qdisc_enqueue_skb_bad_txq(q, nskb);
+
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_inc(q, nskb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ } else {
+ qdisc_qstats_backlog_inc(q, nskb);
+ q->q.qlen++;
+ }
break;
}
skb->next = nskb;
@@ -112,40 +219,62 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
int *packets)
{
- struct sk_buff *skb = q->gso_skb;
const struct netdev_queue *txq = q->dev_queue;
+ struct sk_buff *skb = NULL;
*packets = 1;
- if (unlikely(skb)) {
+ if (unlikely(!skb_queue_empty(&q->gso_skb))) {
+ spinlock_t *lock = NULL;
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
+ }
+
+ skb = skb_peek(&q->gso_skb);
+
+ /* skb may be null if another cpu pulls gso_skb off in between
+ * empty check and lock.
+ */
+ if (!skb) {
+ if (lock)
+ spin_unlock(lock);
+ goto validate;
+ }
+
/* skb in gso_skb were already validated */
*validate = false;
+ if (xfrm_offload(skb))
+ *validate = true;
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
- q->gso_skb = NULL;
- qdisc_qstats_backlog_dec(q, skb);
- q->q.qlen--;
- } else
+ skb = __skb_dequeue(&q->gso_skb);
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_backlog_dec(q, skb);
+ qdisc_qstats_cpu_qlen_dec(q);
+ } else {
+ qdisc_qstats_backlog_dec(q, skb);
+ q->q.qlen--;
+ }
+ } else {
skb = NULL;
- goto trace;
- }
- *validate = true;
- skb = q->skb_bad_txq;
- if (unlikely(skb)) {
- /* check the reason of requeuing without tx lock first */
- txq = skb_get_tx_queue(txq->dev, skb);
- if (!netif_xmit_frozen_or_stopped(txq)) {
- q->skb_bad_txq = NULL;
- qdisc_qstats_backlog_dec(q, skb);
- q->q.qlen--;
- goto bulk;
}
- skb = NULL;
+ if (lock)
+ spin_unlock(lock);
goto trace;
}
- if (!(q->flags & TCQ_F_ONETXQUEUE) ||
- !netif_xmit_frozen_or_stopped(txq))
- skb = q->dequeue(q);
+validate:
+ *validate = true;
+
+ if ((q->flags & TCQ_F_ONETXQUEUE) &&
+ netif_xmit_frozen_or_stopped(txq))
+ return skb;
+
+ skb = qdisc_dequeue_skb_bad_txq(q);
+ if (unlikely(skb))
+ goto bulk;
+ skb = q->dequeue(q);
if (skb) {
bulk:
if (qdisc_may_bulk(q))
@@ -164,21 +293,33 @@ trace:
* only one CPU can execute this function.
*
* Returns to the caller:
- * 0 - queue is empty or throttled.
- * >0 - queue is not empty.
+ * false - hardware queue frozen backoff
+ * true - feel free to send more pkts
*/
-int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
- struct net_device *dev, struct netdev_queue *txq,
- spinlock_t *root_lock, bool validate)
+bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+ struct net_device *dev, struct netdev_queue *txq,
+ spinlock_t *root_lock, bool validate)
{
int ret = NETDEV_TX_BUSY;
+ bool again = false;
/* And release qdisc */
- spin_unlock(root_lock);
+ if (root_lock)
+ spin_unlock(root_lock);
/* Note that we validate skb (GSO, checksum, ...) outside of locks */
if (validate)
- skb = validate_xmit_skb_list(skb, dev);
+ skb = validate_xmit_skb_list(skb, dev, &again);
+
+#ifdef CONFIG_XFRM_OFFLOAD
+ if (unlikely(again)) {
+ if (root_lock)
+ spin_lock(root_lock);
+
+ dev_requeue_skb(skb, q);
+ return false;
+ }
+#endif
if (likely(skb)) {
HARD_TX_LOCK(dev, txq, smp_processor_id());
@@ -187,27 +328,28 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_UNLOCK(dev, txq);
} else {
- spin_lock(root_lock);
- return qdisc_qlen(q);
+ if (root_lock)
+ spin_lock(root_lock);
+ return true;
}
- spin_lock(root_lock);
- if (dev_xmit_complete(ret)) {
- /* Driver sent out skb successfully or skb was consumed */
- ret = qdisc_qlen(q);
- } else {
+ if (root_lock)
+ spin_lock(root_lock);
+
+ if (!dev_xmit_complete(ret)) {
/* Driver returned NETDEV_TX_BUSY - requeue skb */
if (unlikely(ret != NETDEV_TX_BUSY))
net_warn_ratelimited("BUG %s code %d qlen %d\n",
dev->name, ret, q->q.qlen);
- ret = dev_requeue_skb(skb, q);
+ dev_requeue_skb(skb, q);
+ return false;
}
if (ret && netif_xmit_frozen_or_stopped(txq))
- ret = 0;
+ return false;
- return ret;
+ return true;
}
/*
@@ -229,20 +371,22 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
* >0 - queue is not empty.
*
*/
-static inline int qdisc_restart(struct Qdisc *q, int *packets)
+static inline bool qdisc_restart(struct Qdisc *q, int *packets)
{
+ spinlock_t *root_lock = NULL;
struct netdev_queue *txq;
struct net_device *dev;
- spinlock_t *root_lock;
struct sk_buff *skb;
bool validate;
/* Dequeue packet */
skb = dequeue_skb(q, &validate, packets);
if (unlikely(!skb))
- return 0;
+ return false;
+
+ if (!(q->flags & TCQ_F_NOLOCK))
+ root_lock = qdisc_lock(q);
- root_lock = qdisc_lock(q);
dev = qdisc_dev(q);
txq = skb_get_tx_queue(dev, skb);
@@ -266,8 +410,6 @@ void __qdisc_run(struct Qdisc *q)
break;
}
}
-
- qdisc_run_end(q);
}
unsigned long dev_trans_start(struct net_device *dev)
@@ -277,6 +419,8 @@ unsigned long dev_trans_start(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
+ else if (netif_is_macvlan(dev))
+ dev = macvlan_dev_real_dev(dev);
res = netdev_get_tx_queue(dev, 0)->trans_start;
for (i = 1; i < dev->num_tx_queues; i++) {
val = netdev_get_tx_queue(dev, i)->trans_start;
@@ -366,7 +510,7 @@ void netif_carrier_on(struct net_device *dev)
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
- atomic_inc(&dev->carrier_changes);
+ atomic_inc(&dev->carrier_up_count);
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
@@ -385,7 +529,7 @@ void netif_carrier_off(struct net_device *dev)
if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
- atomic_inc(&dev->carrier_changes);
+ atomic_inc(&dev->carrier_down_count);
linkwatch_fire_event(dev);
}
}
@@ -434,7 +578,8 @@ struct Qdisc noop_qdisc = {
};
EXPORT_SYMBOL(noop_qdisc);
-static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt)
+static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
/* register_qdisc() assigns a default of noop_enqueue if unset,
* but __dev_queue_xmit() treats noqueue only as such
@@ -465,93 +610,99 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = {
/*
* Private data for a pfifo_fast scheduler containing:
- * - queues for the three band
- * - bitmap indicating which of the bands contain skbs
+ * - rings for priority bands
*/
struct pfifo_fast_priv {
- u32 bitmap;
- struct qdisc_skb_head q[PFIFO_FAST_BANDS];
+ struct skb_array q[PFIFO_FAST_BANDS];
};
-/*
- * Convert a bitmap to the first band number where an skb is queued, where:
- * bitmap=0 means there are no skbs on any band.
- * bitmap=1 means there is an skb on band 0.
- * bitmap=7 means there are skbs on all 3 bands, etc.
- */
-static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
-
-static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
- int band)
+static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
+ int band)
{
- return priv->q + band;
+ return &priv->q[band];
}
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
- if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
- int band = prio2band[skb->priority & TC_PRIO_MAX];
- struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- struct qdisc_skb_head *list = band2list(priv, band);
-
- priv->bitmap |= (1 << band);
- qdisc->q.qlen++;
- return __qdisc_enqueue_tail(skb, qdisc, list);
- }
+ int band = prio2band[skb->priority & TC_PRIO_MAX];
+ struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+ struct skb_array *q = band2list(priv, band);
+ int err;
- return qdisc_drop(skb, qdisc, to_free);
+ err = skb_array_produce(q, skb);
+
+ if (unlikely(err))
+ return qdisc_drop_cpu(skb, qdisc, to_free);
+
+ qdisc_qstats_cpu_qlen_inc(qdisc);
+ qdisc_qstats_cpu_backlog_inc(qdisc, skb);
+ return NET_XMIT_SUCCESS;
}
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
+ struct sk_buff *skb = NULL;
+ int band;
- if (likely(band >= 0)) {
- struct qdisc_skb_head *qh = band2list(priv, band);
- struct sk_buff *skb = __qdisc_dequeue_head(qh);
+ for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
+ struct skb_array *q = band2list(priv, band);
- if (likely(skb != NULL)) {
- qdisc_qstats_backlog_dec(qdisc, skb);
- qdisc_bstats_update(qdisc, skb);
- }
+ if (__skb_array_empty(q))
+ continue;
- qdisc->q.qlen--;
- if (qh->qlen == 0)
- priv->bitmap &= ~(1 << band);
-
- return skb;
+ skb = skb_array_consume_bh(q);
+ }
+ if (likely(skb)) {
+ qdisc_qstats_cpu_backlog_dec(qdisc, skb);
+ qdisc_bstats_cpu_update(qdisc, skb);
+ qdisc_qstats_cpu_qlen_dec(qdisc);
}
- return NULL;
+ return skb;
}
static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- int band = bitmap2band[priv->bitmap];
+ struct sk_buff *skb = NULL;
+ int band;
- if (band >= 0) {
- struct qdisc_skb_head *qh = band2list(priv, band);
+ for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
+ struct skb_array *q = band2list(priv, band);
- return qh->head;
+ skb = __skb_array_peek(q);
}
- return NULL;
+ return skb;
}
static void pfifo_fast_reset(struct Qdisc *qdisc)
{
- int prio;
+ int i, band;
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- __qdisc_reset_queue(band2list(priv, prio));
+ for (band = 0; band < PFIFO_FAST_BANDS; band++) {
+ struct skb_array *q = band2list(priv, band);
+ struct sk_buff *skb;
- priv->bitmap = 0;
- qdisc->qstats.backlog = 0;
- qdisc->q.qlen = 0;
+ /* NULL ring is possible if destroy path is due to a failed
+ * skb_array_init() in pfifo_fast_init() case.
+ */
+ if (!q->ring.queue)
+ continue;
+
+ while ((skb = skb_array_consume_bh(q)) != NULL)
+ kfree_skb(skb);
+ }
+
+ for_each_possible_cpu(i) {
+ struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
+
+ q->backlog = 0;
+ q->qlen = 0;
+ }
}
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
@@ -567,19 +718,68 @@ nla_put_failure:
return -1;
}
-static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
+static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
- int prio;
+ unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+ int prio;
- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- qdisc_skb_head_init(band2list(priv, prio));
+ /* guard against zero length rings */
+ if (!qlen)
+ return -EINVAL;
+
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ struct skb_array *q = band2list(priv, prio);
+ int err;
+
+ err = skb_array_init(q, qlen, GFP_KERNEL);
+ if (err)
+ return -ENOMEM;
+ }
/* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS;
return 0;
}
+static void pfifo_fast_destroy(struct Qdisc *sch)
+{
+ struct pfifo_fast_priv *priv = qdisc_priv(sch);
+ int prio;
+
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ struct skb_array *q = band2list(priv, prio);
+
+ /* NULL ring is possible if destroy path is due to a failed
+ * skb_array_init() in pfifo_fast_init() case.
+ */
+ if (!q->ring.queue)
+ continue;
+ /* Destroy ring but no need to kfree_skb because a call to
+ * pfifo_fast_reset() has already done that work.
+ */
+ ptr_ring_cleanup(&q->ring, NULL);
+ }
+}
+
+static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
+ unsigned int new_len)
+{
+ struct pfifo_fast_priv *priv = qdisc_priv(sch);
+ struct skb_array *bands[PFIFO_FAST_BANDS];
+ int prio;
+
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ struct skb_array *q = band2list(priv, prio);
+
+ bands[prio] = q;
+ }
+
+ return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
+ GFP_KERNEL);
+}
+
struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.id = "pfifo_fast",
.priv_size = sizeof(struct pfifo_fast_priv),
@@ -587,9 +787,12 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.dequeue = pfifo_fast_dequeue,
.peek = pfifo_fast_peek,
.init = pfifo_fast_init,
+ .destroy = pfifo_fast_destroy,
.reset = pfifo_fast_reset,
.dump = pfifo_fast_dump,
+ .change_tx_queue_len = pfifo_fast_change_tx_queue_len,
.owner = THIS_MODULE,
+ .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
};
EXPORT_SYMBOL(pfifo_fast_ops);
@@ -597,7 +800,8 @@ static struct lock_class_key qdisc_tx_busylock;
static struct lock_class_key qdisc_running_key;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- const struct Qdisc_ops *ops)
+ const struct Qdisc_ops *ops,
+ struct netlink_ext_ack *extack)
{
void *p;
struct Qdisc *sch;
@@ -606,6 +810,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
struct net_device *dev;
if (!dev_queue) {
+ NL_SET_ERR_MSG(extack, "No device queue given");
err = -EINVAL;
goto errout;
}
@@ -627,9 +832,24 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
sch->padded = (char *) sch - (char *) p;
}
+ __skb_queue_head_init(&sch->gso_skb);
+ __skb_queue_head_init(&sch->skb_bad_txq);
qdisc_skb_head_init(&sch->q);
spin_lock_init(&sch->q.lock);
+ if (ops->static_flags & TCQ_F_CPUSTATS) {
+ sch->cpu_bstats =
+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!sch->cpu_bstats)
+ goto errout1;
+
+ sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!sch->cpu_qstats) {
+ free_percpu(sch->cpu_bstats);
+ goto errout1;
+ }
+ }
+
spin_lock_init(&sch->busylock);
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
@@ -639,6 +859,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
dev->qdisc_running_key ?: &qdisc_running_key);
sch->ops = ops;
+ sch->flags = ops->static_flags;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
@@ -646,27 +867,32 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
refcount_set(&sch->refcnt, 1);
return sch;
+errout1:
+ kfree(p);
errout:
return ERR_PTR(err);
}
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
- unsigned int parentid)
+ unsigned int parentid,
+ struct netlink_ext_ack *extack)
{
struct Qdisc *sch;
- if (!try_module_get(ops->owner))
+ if (!try_module_get(ops->owner)) {
+ NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
return NULL;
+ }
- sch = qdisc_alloc(dev_queue, ops);
+ sch = qdisc_alloc(dev_queue, ops, extack);
if (IS_ERR(sch)) {
module_put(ops->owner);
return NULL;
}
sch->parent = parentid;
- if (!ops->init || ops->init(sch, NULL) == 0)
+ if (!ops->init || ops->init(sch, NULL, extack) == 0)
return sch;
qdisc_destroy(sch);
@@ -679,23 +905,27 @@ EXPORT_SYMBOL(qdisc_create_dflt);
void qdisc_reset(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct sk_buff *skb, *tmp;
if (ops->reset)
ops->reset(qdisc);
- kfree_skb(qdisc->skb_bad_txq);
- qdisc->skb_bad_txq = NULL;
+ skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
+ __skb_unlink(skb, &qdisc->gso_skb);
+ kfree_skb_list(skb);
+ }
- if (qdisc->gso_skb) {
- kfree_skb_list(qdisc->gso_skb);
- qdisc->gso_skb = NULL;
+ skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
+ __skb_unlink(skb, &qdisc->skb_bad_txq);
+ kfree_skb_list(skb);
}
+
qdisc->q.qlen = 0;
qdisc->qstats.backlog = 0;
}
EXPORT_SYMBOL(qdisc_reset);
-static void qdisc_free(struct Qdisc *qdisc)
+void qdisc_free(struct Qdisc *qdisc)
{
if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
@@ -708,6 +938,7 @@ static void qdisc_free(struct Qdisc *qdisc)
void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct sk_buff *skb, *tmp;
if (qdisc->flags & TCQ_F_BUILTIN ||
!refcount_dec_and_test(&qdisc->refcnt))
@@ -727,8 +958,16 @@ void qdisc_destroy(struct Qdisc *qdisc)
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
- kfree_skb_list(qdisc->gso_skb);
- kfree_skb(qdisc->skb_bad_txq);
+ skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
+ __skb_unlink(skb, &qdisc->gso_skb);
+ kfree_skb_list(skb);
+ }
+
+ skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
+ __skb_unlink(skb, &qdisc->skb_bad_txq);
+ kfree_skb_list(skb);
+ }
+
qdisc_free(qdisc);
}
EXPORT_SYMBOL(qdisc_destroy);
@@ -743,10 +982,6 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
root_lock = qdisc_lock(oqdisc);
spin_lock_bh(root_lock);
- /* Prune old scheduler */
- if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1)
- qdisc_reset(oqdisc);
-
/* ... and graft new one */
if (qdisc == NULL)
qdisc = &noop_qdisc;
@@ -769,7 +1004,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
if (dev->priv_flags & IFF_NO_QUEUE)
ops = &noqueue_qdisc_ops;
- qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT);
+ qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
if (!qdisc) {
netdev_info(dev, "activation failed\n");
return;
@@ -792,7 +1027,7 @@ static void attach_default_qdiscs(struct net_device *dev)
dev->qdisc = txq->qdisc_sleeping;
qdisc_refcount_inc(dev->qdisc);
} else {
- qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
+ qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
if (qdisc) {
dev->qdisc = qdisc;
qdisc->ops->attach(qdisc);
@@ -882,14 +1117,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
dev_queue = netdev_get_tx_queue(dev, i);
q = dev_queue->qdisc_sleeping;
- root_lock = qdisc_lock(q);
- spin_lock_bh(root_lock);
+ if (q->flags & TCQ_F_NOLOCK) {
+ val = test_bit(__QDISC_STATE_SCHED, &q->state);
+ } else {
+ root_lock = qdisc_lock(q);
+ spin_lock_bh(root_lock);
- val = (qdisc_is_running(q) ||
- test_bit(__QDISC_STATE_SCHED, &q->state));
+ val = (qdisc_is_running(q) ||
+ test_bit(__QDISC_STATE_SCHED, &q->state));
- spin_unlock_bh(root_lock);
+ spin_unlock_bh(root_lock);
+ }
if (val)
return true;
@@ -897,6 +1136,16 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
+static void dev_qdisc_reset(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *none)
+{
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+ if (qdisc)
+ qdisc_reset(qdisc);
+}
+
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@@ -907,7 +1156,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
void dev_deactivate_many(struct list_head *head)
{
struct net_device *dev;
- bool sync_needed = false;
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
@@ -917,20 +1165,25 @@ void dev_deactivate_many(struct list_head *head)
&noop_qdisc);
dev_watchdog_down(dev);
- sync_needed |= !dev->dismantle;
}
/* Wait for outstanding qdisc-less dev_queue_xmit calls.
* This is avoided if all devices are in dismantle phase :
* Caller will call synchronize_net() for us
*/
- if (sync_needed)
- synchronize_net();
+ synchronize_net();
/* Wait for outstanding qdisc_run calls. */
- list_for_each_entry(dev, head, close_list)
+ list_for_each_entry(dev, head, close_list) {
while (some_qdisc_is_busy(dev))
yield();
+ /* The new qdisc is assigned at this point so we can safely
+ * unwind stale skb lists and qdisc statistics
+ */
+ netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
+ if (dev_ingress_queue(dev))
+ dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
+ }
}
void dev_deactivate(struct net_device *dev)
@@ -943,6 +1196,39 @@ void dev_deactivate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_deactivate);
+static int qdisc_change_tx_queue_len(struct net_device *dev,
+ struct netdev_queue *dev_queue)
+{
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ const struct Qdisc_ops *ops = qdisc->ops;
+
+ if (ops->change_tx_queue_len)
+ return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
+ return 0;
+}
+
+int dev_qdisc_change_tx_queue_len(struct net_device *dev)
+{
+ bool up = dev->flags & IFF_UP;
+ unsigned int i;
+ int ret = 0;
+
+ if (up)
+ dev_deactivate(dev);
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
+
+ /* TODO: revert changes on a partial failure */
+ if (ret)
+ break;
+ }
+
+ if (up)
+ dev_activate(dev);
+ return ret;
+}
+
static void dev_init_scheduler_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc)
@@ -951,6 +1237,8 @@ static void dev_init_scheduler_queue(struct net_device *dev,
rcu_assign_pointer(dev_queue->qdisc, qdisc);
dev_queue->qdisc_sleeping = qdisc;
+ __skb_queue_head_init(&qdisc->gso_skb);
+ __skb_queue_head_init(&qdisc->skb_bad_txq);
}
void dev_init_scheduler(struct net_device *dev)
@@ -1037,6 +1325,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
if (!tp_head) {
RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+ /* Wait for flying RCU callback before it is freed. */
+ rcu_barrier_bh();
return;
}
@@ -1052,7 +1342,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
rcu_assign_pointer(*miniqp->p_miniq, miniq);
if (miniq_old)
- /* This is counterpart of the rcu barrier above. We need to
+ /* This is counterpart of the rcu barriers above. We need to
* block potential new user of miniq_old until all readers
* are not seeing it.
*/
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 17c7130454bd..cbe4831f46f4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -306,12 +306,13 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
struct tc_gred_sopt *sopt;
int i;
- if (dps == NULL)
+ if (!dps)
return -EINVAL;
sopt = nla_data(dps);
- if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
+ if (sopt->DPs > MAX_DPs || sopt->DPs == 0 ||
+ sopt->def_DP >= sopt->DPs)
return -EINVAL;
sch_tree_lock(sch);
@@ -356,6 +357,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q = table->tab[dp];
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ return -EINVAL;
+
if (!q) {
table->tab[dp] = q = *prealloc;
*prealloc = NULL;
@@ -388,7 +392,8 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
[TCA_GRED_LIMIT] = { .type = NLA_U32 },
};
-static int gred_change(struct Qdisc *sch, struct nlattr *opt)
+static int gred_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct gred_sched *table = qdisc_priv(sch);
struct tc_gred_qopt *ctl;
@@ -462,12 +467,13 @@ errout:
return err;
}
-static int gred_init(struct Qdisc *sch, struct nlattr *opt)
+static int gred_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_GRED_MAX + 1];
int err;
- if (opt == NULL)
+ if (!opt)
return -EINVAL;
err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d04068a97d81..3ae9877ea205 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -921,7 +921,8 @@ static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
static int
hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)*arg;
@@ -1033,7 +1034,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
- err = tcf_block_get(&cl->block, &cl->filter_list, sch);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
return err;
@@ -1061,8 +1062,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->cl_common.classid = classid;
cl->sched = q;
cl->cl_parent = parent;
- cl->qdisc = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, classid);
+ cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ classid, NULL);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
else
@@ -1176,7 +1177,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
static int
hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
@@ -1184,7 +1185,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
return -EINVAL;
if (new == NULL) {
new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- cl->cl_common.classid);
+ cl->cl_common.classid, NULL);
if (new == NULL)
new = &noop_qdisc;
}
@@ -1246,7 +1247,8 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
cl->filter_cnt--;
}
-static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg)
+static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
@@ -1388,7 +1390,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
}
static int
-hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct tc_hfsc_qopt *qopt;
@@ -1396,7 +1399,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
- if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+ if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
qopt = nla_data(opt);
@@ -1406,14 +1409,14 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
return err;
q->eligible = RB_ROOT;
- err = tcf_block_get(&q->root.block, &q->root.filter_list, sch);
+ err = tcf_block_get(&q->root.block, &q->root.filter_list, sch, extack);
if (err)
return err;
q->root.cl_common.classid = sch->handle;
q->root.sched = q;
q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle);
+ sch->handle, NULL);
if (q->root.qdisc == NULL)
q->root.qdisc = &noop_qdisc;
else
@@ -1429,7 +1432,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
}
static int
-hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt)
+hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct tc_hfsc_qopt *qopt;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 73a53c08091b..bce2632212d3 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -504,7 +504,8 @@ static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
[TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 },
};
-static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
+static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct hhf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HHF_MAX + 1];
@@ -571,7 +572,8 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
+static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct hhf_sched_data *q = qdisc_priv(sch);
int i;
@@ -589,7 +591,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
q->hhf_non_hh_weight = 2;
if (opt) {
- int err = hhf_change(sch, opt);
+ int err = hhf_change(sch, opt, extack);
if (err)
return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index fa0380730ff0..1ea9846cc6ce 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1017,7 +1017,8 @@ static void htb_work_func(struct work_struct *work)
rcu_read_unlock();
}
-static int htb_init(struct Qdisc *sch, struct nlattr *opt)
+static int htb_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HTB_MAX + 1];
@@ -1031,7 +1032,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
@@ -1171,7 +1172,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
}
static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct htb_class *cl = (struct htb_class *)arg;
@@ -1179,7 +1180,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
return -EINVAL;
if (new == NULL &&
(new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- cl->common.classid)) == NULL)
+ cl->common.classid, extack)) == NULL)
return -ENOBUFS;
*old = qdisc_replace(sch, new, &cl->un.leaf.q);
@@ -1289,7 +1290,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
if (!cl->level && htb_parent_last_child(cl)) {
new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- cl->parent->common.classid);
+ cl->parent->common.classid,
+ NULL);
last_child = 1;
}
@@ -1326,7 +1328,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
static int htb_change_class(struct Qdisc *sch, u32 classid,
u32 parentid, struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, struct netlink_ext_ack *extack)
{
int err = -EINVAL;
struct htb_sched *q = qdisc_priv(sch);
@@ -1356,10 +1358,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* Keeping backward compatible with rate_table based iproute2 tc */
if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
- qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]));
+ qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
+ NULL));
if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
- qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]));
+ qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
+ NULL));
if (!cl) { /* new class */
struct Qdisc *new_q;
@@ -1394,7 +1398,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list, sch);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
goto failure;
@@ -1423,8 +1427,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
* so that can't be used inside of sch_tree_lock
* -- thanks to Karlis Peisenieks
*/
- new_q = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, classid);
+ new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ classid, NULL);
sch_tree_lock(sch);
if (parent && !parent->level) {
unsigned int qlen = parent->un.leaf.q->q.qlen;
@@ -1524,7 +1528,8 @@ failure:
return err;
}
-static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg)
+static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5ecc38f35d47..ce3f55259d0d 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -48,7 +48,8 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
}
-static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct ingress_sched_data *q = qdisc_priv(sch);
@@ -60,13 +61,29 @@ static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
struct mini_Qdisc_pair *miniqp = priv;
mini_qdisc_pair_swap(miniqp, tp_head);
+};
+
+static void ingress_ingress_block_set(struct Qdisc *sch, u32 block_index)
+{
+ struct ingress_sched_data *q = qdisc_priv(sch);
+
+ q->block_info.block_index = block_index;
}
-static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
+static u32 ingress_ingress_block_get(struct Qdisc *sch)
+{
+ struct ingress_sched_data *q = qdisc_priv(sch);
+
+ return q->block_info.block_index;
+}
+
+static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- int err;
+
+ net_inc_ingress_queue();
mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
@@ -74,14 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
q->block_info.chain_head_change = clsact_chain_head_change;
q->block_info.chain_head_change_priv = &q->miniqp;
- err = tcf_block_get_ext(&q->block, sch, &q->block_info);
- if (err)
- return err;
-
- net_inc_ingress_queue();
- sch->flags |= TCQ_F_CPUSTATS;
-
- return 0;
+ return tcf_block_get_ext(&q->block, sch, &q->block_info, extack);
}
static void ingress_destroy(struct Qdisc *sch)
@@ -117,13 +127,16 @@ static const struct Qdisc_class_ops ingress_class_ops = {
};
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
- .cl_ops = &ingress_class_ops,
- .id = "ingress",
- .priv_size = sizeof(struct ingress_sched_data),
- .init = ingress_init,
- .destroy = ingress_destroy,
- .dump = ingress_dump,
- .owner = THIS_MODULE,
+ .cl_ops = &ingress_class_ops,
+ .id = "ingress",
+ .priv_size = sizeof(struct ingress_sched_data),
+ .static_flags = TCQ_F_CPUSTATS,
+ .init = ingress_init,
+ .destroy = ingress_destroy,
+ .dump = ingress_dump,
+ .ingress_block_set = ingress_ingress_block_set,
+ .ingress_block_get = ingress_ingress_block_get,
+ .owner = THIS_MODULE,
};
struct clsact_sched_data {
@@ -152,7 +165,8 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch,
return clsact_find(sch, classid);
}
-static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct clsact_sched_data *q = qdisc_priv(sch);
@@ -166,19 +180,52 @@ static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
}
}
-static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
+static void clsact_ingress_block_set(struct Qdisc *sch, u32 block_index)
+{
+ struct clsact_sched_data *q = qdisc_priv(sch);
+
+ q->ingress_block_info.block_index = block_index;
+}
+
+static void clsact_egress_block_set(struct Qdisc *sch, u32 block_index)
+{
+ struct clsact_sched_data *q = qdisc_priv(sch);
+
+ q->egress_block_info.block_index = block_index;
+}
+
+static u32 clsact_ingress_block_get(struct Qdisc *sch)
+{
+ struct clsact_sched_data *q = qdisc_priv(sch);
+
+ return q->ingress_block_info.block_index;
+}
+
+static u32 clsact_egress_block_get(struct Qdisc *sch)
+{
+ struct clsact_sched_data *q = qdisc_priv(sch);
+
+ return q->egress_block_info.block_index;
+}
+
+static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct clsact_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
int err;
+ net_inc_ingress_queue();
+ net_inc_egress_queue();
+
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->ingress_block_info.chain_head_change = clsact_chain_head_change;
q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
- err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
+ err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info,
+ extack);
if (err)
return err;
@@ -188,20 +235,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
q->egress_block_info.chain_head_change = clsact_chain_head_change;
q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
- err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
- if (err)
- goto err_egress_block_get;
-
- net_inc_ingress_queue();
- net_inc_egress_queue();
-
- sch->flags |= TCQ_F_CPUSTATS;
-
- return 0;
-
-err_egress_block_get:
- tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
- return err;
+ return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info, extack);
}
static void clsact_destroy(struct Qdisc *sch)
@@ -225,13 +259,18 @@ static const struct Qdisc_class_ops clsact_class_ops = {
};
static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
- .cl_ops = &clsact_class_ops,
- .id = "clsact",
- .priv_size = sizeof(struct clsact_sched_data),
- .init = clsact_init,
- .destroy = clsact_destroy,
- .dump = ingress_dump,
- .owner = THIS_MODULE,
+ .cl_ops = &clsact_class_ops,
+ .id = "clsact",
+ .priv_size = sizeof(struct clsact_sched_data),
+ .static_flags = TCQ_F_CPUSTATS,
+ .init = clsact_init,
+ .destroy = clsact_destroy,
+ .dump = ingress_dump,
+ .ingress_block_set = clsact_ingress_block_set,
+ .egress_block_set = clsact_egress_block_set,
+ .ingress_block_get = clsact_ingress_block_get,
+ .egress_block_get = clsact_egress_block_get,
+ .owner = THIS_MODULE,
};
static int __init ingress_module_init(void)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 213b586a06a0..f062a18e9162 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
struct mq_sched {
struct Qdisc **qdiscs;
@@ -35,7 +36,8 @@ static void mq_destroy(struct Qdisc *sch)
kfree(priv->qdiscs);
}
-static int mq_init(struct Qdisc *sch, struct nlattr *opt)
+static int mq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
@@ -59,7 +61,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
dev_queue = netdev_get_tx_queue(dev, ntx);
qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
TC_H_MAKE(TC_H_MAJ(sch->handle),
- TC_H_MIN(ntx + 1)));
+ TC_H_MIN(ntx + 1)),
+ extack);
if (!qdisc)
return -ENOMEM;
priv->qdiscs[ntx] = qdisc;
@@ -97,23 +100,42 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
struct net_device *dev = qdisc_dev(sch);
struct Qdisc *qdisc;
unsigned int ntx;
+ __u32 qlen = 0;
sch->q.qlen = 0;
memset(&sch->bstats, 0, sizeof(sch->bstats));
memset(&sch->qstats, 0, sizeof(sch->qstats));
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+ * to account for all, none, or a mix of locked and unlocked child
+ * qdiscs. Percpu stats are added to counters in-band and locking
+ * qdisc totals are added at end.
+ */
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
spin_lock_bh(qdisc_lock(qdisc));
- sch->q.qlen += qdisc->q.qlen;
- sch->bstats.bytes += qdisc->bstats.bytes;
- sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.backlog += qdisc->qstats.backlog;
- sch->qstats.drops += qdisc->qstats.drops;
- sch->qstats.requeues += qdisc->qstats.requeues;
- sch->qstats.overlimits += qdisc->qstats.overlimits;
+
+ if (qdisc_is_percpu_stats(qdisc)) {
+ qlen = qdisc_qlen_sum(qdisc);
+ __gnet_stats_copy_basic(NULL, &sch->bstats,
+ qdisc->cpu_bstats,
+ &qdisc->bstats);
+ __gnet_stats_copy_queue(&sch->qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats, qlen);
+ } else {
+ sch->q.qlen += qdisc->q.qlen;
+ sch->bstats.bytes += qdisc->bstats.bytes;
+ sch->bstats.packets += qdisc->bstats.packets;
+ sch->qstats.backlog += qdisc->qstats.backlog;
+ sch->qstats.drops += qdisc->qstats.drops;
+ sch->qstats.requeues += qdisc->qstats.requeues;
+ sch->qstats.overlimits += qdisc->qstats.overlimits;
+ }
+
spin_unlock_bh(qdisc_lock(qdisc));
}
+
return 0;
}
@@ -134,7 +156,7 @@ static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
}
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
struct net_device *dev = qdisc_dev(sch);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b85885a9d8a1..0e9d761cdd80 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,8 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
-static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
@@ -229,7 +230,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
qdisc = qdisc_create_dflt(dev_queue,
get_default_qdisc_ops(dev, i),
TC_H_MAKE(TC_H_MAJ(sch->handle),
- TC_H_MIN(i + 1)));
+ TC_H_MIN(i + 1)), extack);
if (!qdisc)
return -ENOMEM;
@@ -319,7 +320,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
}
static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct net_device *dev = qdisc_dev(sch);
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
@@ -388,22 +389,40 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
- unsigned int i;
+ unsigned int ntx, tc;
sch->q.qlen = 0;
memset(&sch->bstats, 0, sizeof(sch->bstats));
memset(&sch->qstats, 0, sizeof(sch->qstats));
- for (i = 0; i < dev->num_tx_queues; i++) {
- qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+ * to account for all, none, or a mix of locked and unlocked child
+ * qdiscs. Percpu stats are added to counters in-band and locking
+ * qdisc totals are added at end.
+ */
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
spin_lock_bh(qdisc_lock(qdisc));
- sch->q.qlen += qdisc->q.qlen;
- sch->bstats.bytes += qdisc->bstats.bytes;
- sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.backlog += qdisc->qstats.backlog;
- sch->qstats.drops += qdisc->qstats.drops;
- sch->qstats.requeues += qdisc->qstats.requeues;
- sch->qstats.overlimits += qdisc->qstats.overlimits;
+
+ if (qdisc_is_percpu_stats(qdisc)) {
+ __u32 qlen = qdisc_qlen_sum(qdisc);
+
+ __gnet_stats_copy_basic(NULL, &sch->bstats,
+ qdisc->cpu_bstats,
+ &qdisc->bstats);
+ __gnet_stats_copy_queue(&sch->qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats, qlen);
+ } else {
+ sch->q.qlen += qdisc->q.qlen;
+ sch->bstats.bytes += qdisc->bstats.bytes;
+ sch->bstats.packets += qdisc->bstats.packets;
+ sch->qstats.backlog += qdisc->qstats.backlog;
+ sch->qstats.drops += qdisc->qstats.drops;
+ sch->qstats.requeues += qdisc->qstats.requeues;
+ sch->qstats.overlimits += qdisc->qstats.overlimits;
+ }
+
spin_unlock_bh(qdisc_lock(qdisc));
}
@@ -411,9 +430,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
opt.hw = priv->hw_offload;
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- opt.count[i] = dev->tc_to_txq[i].count;
- opt.offset[i] = dev->tc_to_txq[i].offset;
+ for (tc = 0; tc < netdev_get_num_tc(dev); tc++) {
+ opt.count[tc] = dev->tc_to_txq[tc].count;
+ opt.offset[tc] = dev->tc_to_txq[tc].offset;
}
if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
@@ -495,7 +514,6 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (cl >= TC_H_MIN_PRIORITY) {
int i;
__u32 qlen = 0;
- struct Qdisc *qdisc;
struct gnet_stats_queue qstats = {0};
struct gnet_stats_basic_packed bstats = {0};
struct net_device *dev = qdisc_dev(sch);
@@ -511,18 +529,26 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+ struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+ struct gnet_stats_queue __percpu *cpu_qstats = NULL;
- qdisc = rtnl_dereference(q->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
- qlen += qdisc->q.qlen;
- bstats.bytes += qdisc->bstats.bytes;
- bstats.packets += qdisc->bstats.packets;
- qstats.backlog += qdisc->qstats.backlog;
- qstats.drops += qdisc->qstats.drops;
- qstats.requeues += qdisc->qstats.requeues;
- qstats.overlimits += qdisc->qstats.overlimits;
+ if (qdisc_is_percpu_stats(qdisc)) {
+ cpu_bstats = qdisc->cpu_bstats;
+ cpu_qstats = qdisc->cpu_qstats;
+ }
+
+ qlen = qdisc_qlen_sum(qdisc);
+ __gnet_stats_copy_basic(NULL, &sch->bstats,
+ cpu_bstats, &qdisc->bstats);
+ __gnet_stats_copy_queue(&sch->qstats,
+ cpu_qstats,
+ &qdisc->qstats,
+ qlen);
spin_unlock_bh(qdisc_lock(qdisc));
}
+
/* Reclaim root sleeping lock before completing stats */
if (d->lock)
spin_lock_bh(d->lock);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 012216386c0b..1da7ea8de0ad 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -180,7 +180,8 @@ multiq_destroy(struct Qdisc *sch)
kfree(q->queues);
}
-static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
+static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct multiq_sched_data *q = qdisc_priv(sch);
struct tc_multiq_qopt *qopt;
@@ -215,7 +216,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
child = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops,
TC_H_MAKE(sch->handle,
- i + 1));
+ i + 1), extack);
if (child) {
sch_tree_lock(sch);
old = q->queues[i];
@@ -236,17 +237,18 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
+static int multiq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct multiq_sched_data *q = qdisc_priv(sch);
int i, err;
q->queues = NULL;
- if (opt == NULL)
+ if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
@@ -258,7 +260,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < q->max_bands; i++)
q->queues[i] = &noop_qdisc;
- return multiq_tune(sch, opt);
+ return multiq_tune(sch, opt, extack);
}
static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -281,7 +283,7 @@ nla_put_failure:
}
static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct multiq_sched_data *q = qdisc_priv(sch);
unsigned long band = arg - 1;
@@ -369,7 +371,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct multiq_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index dd70924cbcdf..7bbc13b8ca47 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -893,7 +893,8 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
}
/* Parse netlink message to set options */
-static int netem_change(struct Qdisc *sch, struct nlattr *opt)
+static int netem_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct netem_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_NETEM_MAX + 1];
@@ -984,7 +985,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
return ret;
}
-static int netem_init(struct Qdisc *sch, struct nlattr *opt)
+static int netem_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct netem_sched_data *q = qdisc_priv(sch);
int ret;
@@ -995,7 +997,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
return -EINVAL;
q->loss_model = CLG_RANDOM;
- ret = netem_change(sch, opt);
+ ret = netem_change(sch, opt, extack);
if (ret)
pr_info("netem: change failed\n");
return ret;
@@ -1157,7 +1159,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
}
static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct netem_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 776c694c77c7..18d30bb86881 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -181,7 +181,8 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
[TCA_PIE_BYTEMODE] = {.type = NLA_U32},
};
-static int pie_change(struct Qdisc *sch, struct nlattr *opt)
+static int pie_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct pie_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_PIE_MAX + 1];
@@ -439,7 +440,8 @@ static void pie_timer(struct timer_list *t)
}
-static int pie_init(struct Qdisc *sch, struct nlattr *opt)
+static int pie_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct pie_sched_data *q = qdisc_priv(sch);
@@ -451,7 +453,7 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
timer_setup(&q->adapt_timer, pie_timer, 0);
if (opt) {
- int err = pie_change(sch, opt);
+ int err = pie_change(sch, opt, extack);
if (err)
return err;
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 1c6cbab3e7b9..5619d2eb17b6 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -123,7 +123,8 @@ static struct sk_buff *plug_dequeue(struct Qdisc *sch)
return qdisc_dequeue_head(sch);
}
-static int plug_init(struct Qdisc *sch, struct nlattr *opt)
+static int plug_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct plug_sched_data *q = qdisc_priv(sch);
@@ -158,7 +159,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
* command is received (just act as a pass-thru queue).
* TCQ_PLUG_LIMIT: Increase/decrease queue size
*/
-static int plug_change(struct Qdisc *sch, struct nlattr *opt)
+static int plug_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct plug_sched_data *q = qdisc_priv(sch);
struct tc_plug_qopt *msg;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2c79559a0d31..efbf51f35778 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,6 +142,31 @@ prio_reset(struct Qdisc *sch)
sch->q.qlen = 0;
}
+static int prio_offload(struct Qdisc *sch, bool enable)
+{
+ struct prio_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_prio_qopt_offload opt = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ };
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ if (enable) {
+ opt.command = TC_PRIO_REPLACE;
+ opt.replace_params.bands = q->bands;
+ memcpy(&opt.replace_params.priomap, q->prio2band,
+ TC_PRIO_MAX + 1);
+ opt.replace_params.qstats = &sch->qstats;
+ } else {
+ opt.command = TC_PRIO_DESTROY;
+ }
+
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt);
+}
+
static void
prio_destroy(struct Qdisc *sch)
{
@@ -149,11 +174,13 @@ prio_destroy(struct Qdisc *sch)
struct prio_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
+ prio_offload(sch, false);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
-static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
+static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
struct Qdisc *queues[TCQ_PRIO_BANDS];
@@ -175,7 +202,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
/* Before commit, make sure we can allocate all new qdiscs */
for (i = oldbands; i < qopt->bands; i++) {
queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- TC_H_MAKE(sch->handle, i + 1));
+ TC_H_MAKE(sch->handle, i + 1),
+ extack);
if (!queues[i]) {
while (i > oldbands)
qdisc_destroy(queues[--i]);
@@ -202,10 +230,12 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
}
sch_tree_unlock(sch);
+ prio_offload(sch, true);
return 0;
}
-static int prio_init(struct Qdisc *sch, struct nlattr *opt)
+static int prio_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
int err;
@@ -213,11 +243,42 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
- return prio_tune(sch, opt);
+ return prio_tune(sch, opt, extack);
+}
+
+static int prio_dump_offload(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_prio_qopt_offload hw_stats = {
+ .command = TC_PRIO_STATS,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ {
+ .stats = {
+ .bstats = &sch->bstats,
+ .qstats = &sch->qstats,
+ },
+ },
+ };
+ int err;
+
+ sch->flags &= ~TCQ_F_OFFLOADED;
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return 0;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+ &hw_stats);
+ if (err == -EOPNOTSUPP)
+ return 0;
+
+ if (!err)
+ sch->flags |= TCQ_F_OFFLOADED;
+
+ return err;
}
static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -225,10 +286,15 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct prio_sched_data *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
struct tc_prio_qopt opt;
+ int err;
opt.bands = q->bands;
memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
+ err = prio_dump_offload(sch);
+ if (err)
+ goto nla_put_failure;
+
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -240,7 +306,7 @@ nla_put_failure:
}
static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
unsigned long band = arg - 1;
@@ -327,7 +393,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6962b37a3ad3..bb1a9c11fc54 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -402,7 +402,8 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
}
static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
{
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)*arg;
@@ -479,8 +480,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->common.classid = classid;
cl->deficit = lmax;
- cl->qdisc = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, classid);
+ cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ classid, NULL);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
@@ -564,7 +565,8 @@ static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
return (unsigned long)qfq_find_class(sch, classid);
}
-static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -593,13 +595,14 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
}
static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old)
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
{
struct qfq_class *cl = (struct qfq_class *)arg;
if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, cl->common.classid);
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ cl->common.classid, NULL);
if (new == NULL)
new = &noop_qdisc;
}
@@ -1413,14 +1416,15 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
qfq_deactivate_class(q, cl);
}
-static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_group *grp;
int i, j, err;
u32 max_cl_shift, maxbudg_shift, max_classes;
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7f8ea9e297c3..16644b3d2362 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -167,6 +167,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
opt.set.max = q->parms.qth_max >> q->parms.Wlog;
opt.set.probability = q->parms.max_P;
opt.set.is_ecn = red_use_ecn(q);
+ opt.set.qstats = &sch->qstats;
} else {
opt.command = TC_RED_DESTROY;
}
@@ -189,7 +190,8 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_MAX_P] = { .type = NLA_U32 },
};
-static int red_change(struct Qdisc *sch, struct nlattr *opt)
+static int red_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct red_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_RED_MAX + 1];
@@ -212,9 +214,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
ctl = nla_data(tb[TCA_RED_PARMS]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ return -EINVAL;
if (ctl->limit > 0) {
- child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
+ child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
+ extack);
if (IS_ERR(child))
return PTR_ERR(child);
}
@@ -262,17 +267,18 @@ static inline void red_adaptative_timer(struct timer_list *t)
spin_unlock(root_lock);
}
-static int red_init(struct Qdisc *sch, struct nlattr *opt)
+static int red_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct red_sched_data *q = qdisc_priv(sch);
q->qdisc = &noop_qdisc;
q->sch = sch;
timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
- return red_change(sch, opt);
+ return red_change(sch, opt, extack);
}
-static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_red_qopt_offload hw_stats = {
@@ -286,7 +292,8 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
};
int err;
- opt->flags &= ~TC_RED_OFFLOADED;
+ sch->flags &= ~TCQ_F_OFFLOADED;
+
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return 0;
@@ -296,7 +303,7 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
return 0;
if (!err)
- opt->flags |= TC_RED_OFFLOADED;
+ sch->flags |= TCQ_F_OFFLOADED;
return err;
}
@@ -316,8 +323,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
};
int err;
- sch->qstats.backlog = q->qdisc->qstats.backlog;
- err = red_dump_offload(sch, &opt);
+ err = red_dump_offload_stats(sch, &opt);
if (err)
goto nla_put_failure;
@@ -338,32 +344,24 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct red_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- struct tc_red_xstats st = {
- .early = q->stats.prob_drop + q->stats.forced_drop,
- .pdrop = q->stats.pdrop,
- .other = q->stats.other,
- .marked = q->stats.prob_mark + q->stats.forced_mark,
- };
+ struct tc_red_xstats st = {0};
- if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
- struct red_stats hw_stats = {0};
+ if (sch->flags & TCQ_F_OFFLOADED) {
struct tc_red_qopt_offload hw_stats_request = {
.command = TC_RED_XSTATS,
.handle = sch->handle,
.parent = sch->parent,
{
- .xstats = &hw_stats,
+ .xstats = &q->stats,
},
};
- if (!dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_RED,
- &hw_stats_request)) {
- st.early += hw_stats.prob_drop + hw_stats.forced_drop;
- st.pdrop += hw_stats.pdrop;
- st.other += hw_stats.other;
- st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
- }
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats_request);
}
+ st.early = q->stats.prob_drop + q->stats.forced_drop;
+ st.pdrop = q->stats.pdrop;
+ st.other = q->stats.other;
+ st.marked = q->stats.prob_mark + q->stats.forced_mark;
return gnet_stats_copy_app(d, &st, sizeof(st));
}
@@ -379,7 +377,7 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl,
}
static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct red_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0678debdd856..7cbdad8419b7 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -488,7 +488,8 @@ static const struct tc_sfb_qopt sfb_default_ops = {
.penalty_burst = 20,
};
-static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
+static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct sfb_sched_data *q = qdisc_priv(sch);
struct Qdisc *child;
@@ -512,7 +513,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
if (limit == 0)
limit = qdisc_dev(sch)->tx_queue_len;
- child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
+ child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit, extack);
if (IS_ERR(child))
return PTR_ERR(child);
@@ -549,17 +550,18 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
+static int sfb_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct sfb_sched_data *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
q->qdisc = &noop_qdisc;
- return sfb_change(sch, opt);
+ return sfb_change(sch, opt, extack);
}
static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -615,7 +617,7 @@ static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
}
static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct sfb_sched_data *q = qdisc_priv(sch);
@@ -643,7 +645,8 @@ static void sfb_unbind(struct Qdisc *sch, unsigned long arg)
}
static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
{
return -ENOSYS;
}
@@ -665,7 +668,8 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct sfb_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 890f4a4564e7..2f2678197760 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+ if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
+ ctl_v1->Wlog))
+ return -EINVAL;
if (ctl_v1 && ctl_v1->qth_min) {
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (!p)
@@ -718,15 +721,17 @@ static void sfq_destroy(struct Qdisc *sch)
kfree(q->red_parms);
}
-static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
+static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct sfq_sched_data *q = qdisc_priv(sch);
int i;
int err;
+ q->sch = sch;
timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
- err = tcf_block_get(&q->block, &q->filter_list, sch);
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
return err;
@@ -832,7 +837,8 @@ static void sfq_unbind(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl)
+static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
struct sfq_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 120f4f365967..229172d509cc 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -142,16 +142,6 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
return len;
}
-/*
- * Return length of individual segments of a gso packet,
- * including all headers (MAC, IP, TCP/UDP)
- */
-static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
-{
- unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
- return hdr_len + skb_gso_transport_seglen(skb);
-}
-
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
@@ -302,7 +292,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
[TCA_TBF_PBURST] = { .type = NLA_U32 },
};
-static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
+static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
int err;
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -326,11 +317,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
qopt = nla_data(tb[TCA_TBF_PARMS]);
if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
- tb[TCA_TBF_RTAB]));
+ tb[TCA_TBF_RTAB],
+ NULL));
if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
- tb[TCA_TBF_PTAB]));
+ tb[TCA_TBF_PTAB],
+ NULL));
buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
@@ -383,7 +376,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
if (err)
goto done;
} else if (qopt->limit > 0) {
- child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
+ child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
+ extack);
if (IS_ERR(child)) {
err = PTR_ERR(child);
goto done;
@@ -421,19 +415,20 @@ done:
return err;
}
-static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
+static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = &noop_qdisc;
- if (opt == NULL)
+ if (!opt)
return -EINVAL;
q->t_c = ktime_get_ns();
- return tbf_change(sch, opt);
+ return tbf_change(sch, opt, extack);
}
static void tbf_destroy(struct Qdisc *sch)
@@ -494,7 +489,7 @@ static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
}
static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
+ struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct tbf_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 9fe6b427afed..93f04cf5cac1 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -167,7 +167,8 @@ teql_destroy(struct Qdisc *sch)
}
}
-static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
+static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = qdisc_dev(sch);
struct teql_master *m = (struct teql_master *)sch->ops;
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index d9c04dc1b3f3..c740b189d4ba 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,18 +37,6 @@ menuconfig IP_SCTP
if IP_SCTP
-config NET_SCTPPROBE
- tristate "SCTP: Association probing"
- depends on PROC_FS && KPROBES
- ---help---
- This module allows for capturing the changes to SCTP association
- state in response to incoming packets. It is used for debugging
- SCTP congestion control algorithms. If you don't understand
- what was just said, you don't need it: say N.
-
- To compile this code as a module, choose M here: the
- module will be called sctp_probe.
-
config SCTP_DBG_OBJCNT
bool "SCTP: Debug object counts"
depends on PROC_FS
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 1ca84a288443..6776582ec449 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -4,7 +4,6 @@
#
obj-$(CONFIG_IP_SCTP) += sctp.o
-obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o
sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
@@ -14,9 +13,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o stream.o auth.o \
offload.o stream_sched.o stream_sched_prio.o \
- stream_sched_rr.o
-
-sctp_probe-y := probe.o
+ stream_sched_rr.o stream_interleave.o
sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
sctp-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 69394f4d6091..837806dd5799 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -861,7 +861,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
0, spc_state, error, GFP_ATOMIC);
if (event)
- sctp_ulpq_tail_event(&asoc->ulpq, event);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, event);
}
/* Select new active and retran paths. */
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7b261afc47b9..991a530c6b31 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
msg->send_failed = 0;
msg->send_error = 0;
msg->can_delay = 1;
+ msg->abandoned = 0;
msg->expires_at = 0;
INIT_LIST_HEAD(&msg->chunks);
}
@@ -123,7 +124,7 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent,
error, GFP_ATOMIC);
if (ev)
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
sctp_chunk_put(chunk);
@@ -190,7 +191,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
*/
max_data = asoc->pathmtu -
sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+ sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream);
max_data = SCTP_TRUNC4(max_data);
/* If the the peer requested that we authenticate DATA chunks
@@ -263,8 +264,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
frag |= SCTP_DATA_SACK_IMM;
}
- chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag,
- 0, GFP_KERNEL);
+ chunk = asoc->stream.si->make_datafrag(asoc, sinfo, len, frag,
+ GFP_KERNEL);
if (!chunk) {
err = -ENOMEM;
goto errout;
@@ -304,6 +305,13 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (!chunk->asoc->peer.prsctp_capable)
return 0;
+ if (chunk->msg->abandoned)
+ return 1;
+
+ if (!chunk->has_tsn &&
+ !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG))
+ return 0;
+
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
time_after(jiffies, chunk->msg->expires_at)) {
struct sctp_stream_out *streamout =
@@ -316,6 +324,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
}
+ chunk->msg->abandoned = 1;
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
@@ -324,10 +333,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ chunk->msg->abandoned = 1;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
time_after(jiffies, chunk->msg->expires_at)) {
+ chunk->msg->abandoned = 1;
return 1;
}
/* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
case SCTP_CID_AUTH:
return "AUTH";
+ case SCTP_CID_RECONF:
+ return "RECONF";
+
default:
break;
}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ee1e601a0b11..8b3146816519 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -232,7 +232,7 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
{
ep->base.dead = true;
- ep->base.sk->sk_state = SCTP_SS_CLOSED;
+ inet_sk_set_state(ep->base.sk, SCTP_SS_CLOSED);
/* Unlink this endpoint, so we can't find it again! */
sctp_unhash_endpoint(ep);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c..141c9c466ec1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
return;
}
- if (t->param_flags & SPP_PMTUD_ENABLE) {
- /* Update transports view of the MTU */
- sctp_transport_update_pmtu(t, pmtu);
-
- /* Update association pmtu. */
- sctp_assoc_sync_pmtu(asoc);
- }
+ if (!(t->param_flags & SPP_PMTUD_ENABLE))
+ /* We can't allow retransmitting in such case, as the
+ * retransmission would be sized just as before, and thus we
+ * would get another icmp, and retransmit again.
+ */
+ return;
- /* Retransmit with the new pmtu setting.
- * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
- * Needed will never be sent, but if a message was sent before
- * PMTU discovery was disabled that was larger than the PMTU, it
- * would not be fragmented, so it must be re-transmitted fragmented.
+ /* Update transports view of the MTU. Return if no update was needed.
+ * If an update wasn't needed/possible, it also doesn't make sense to
+ * try to retransmit now.
*/
+ if (!sctp_transport_update_pmtu(t, pmtu))
+ return;
+
+ /* Update association pmtu. */
+ sctp_assoc_sync_pmtu(asoc);
+
+ /* Retransmit with the new pmtu setting. */
sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 3b18085e3b10..5d4c15bf66d2 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -826,6 +826,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
case AF_INET:
if (!__ipv6_only_sock(sctp_opt2sk(sp)))
return 1;
+ /* fallthru */
default:
return 0;
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 275925b93b29..35bc7106d182 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+ goto out;
+
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 4a865cd06d76..01a26ee051e3 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -313,6 +313,7 @@ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
/* We believe that this chunk is OK to add to the packet */
switch (chunk->chunk_hdr->type) {
case SCTP_CID_DATA:
+ case SCTP_CID_I_DATA:
/* Account for the data being in the packet */
sctp_packet_append_data(packet, chunk);
/* Disallow SACK bundling after DATA. */
@@ -724,7 +725,7 @@ static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
* or delay in hopes of bundling a full sized packet.
*/
if (chunk->skb->len + q->out_qlen > transport->pathmtu -
- packet->overhead - sizeof(struct sctp_data_chunk) - 4)
+ packet->overhead - sctp_datachk_len(&chunk->asoc->stream) - 4)
/* Enough data queued to fill a packet */
return SCTP_XMIT_OK;
@@ -759,7 +760,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
asoc->peer.rwnd = rwnd;
sctp_chunk_assign_tsn(chunk);
- sctp_chunk_assign_ssn(chunk);
+ asoc->stream.si->assign_number(chunk);
}
static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4db012aa25f7..f211b3db6a35 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -67,8 +67,6 @@ static void sctp_mark_missing(struct sctp_outq *q,
__u32 highest_new_tsn,
int count_of_newacks);
-static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
-
static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
/* Add data to the front of the queue. */
@@ -364,10 +362,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
struct sctp_stream_out *streamout;
- if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+ if (!chk->msg->abandoned &&
+ (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
continue;
+ chk->msg->abandoned = 1;
list_del_init(&chk->transmitted_list);
sctp_insert_list(&asoc->outqueue.abandoned,
&chk->transmitted_list);
@@ -377,7 +377,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
- if (!chk->tsn_gap_acked) {
+ if (queue != &asoc->outqueue.retransmit &&
+ !chk->tsn_gap_acked) {
if (chk->transport)
chk->transport->flight_size -=
sctp_data_size(chk);
@@ -403,10 +404,13 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
q->sched->unsched_all(&asoc->stream);
list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
- if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
+ if (!chk->msg->abandoned &&
+ (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) ||
+ !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive))
continue;
+ chk->msg->abandoned = 1;
sctp_sched_dequeue_common(q, chk);
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
@@ -585,7 +589,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
* following the procedures outlined in C1 - C5.
*/
if (reason == SCTP_RTXR_T3_RTX)
- sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
+ q->asoc->stream.si->generate_ftsn(q, q->asoc->ctsn_ack_point);
/* Flush the queues only on timeout, since fast_rtx is only
* triggered during sack processing and the queue
@@ -912,9 +916,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
break;
case SCTP_CID_ABORT:
- if (sctp_test_T_bit(chunk)) {
+ if (sctp_test_T_bit(chunk))
packet->vtag = asoc->c.my_vtag;
- }
+ /* fallthru */
/* The following chunks are "response" chunks, i.e.
* they are generated in response to something we
* received. If we are sending these, then we can
@@ -936,6 +940,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
case SCTP_CID_ECN_ECNE:
case SCTP_CID_ASCONF:
case SCTP_CID_FWD_TSN:
+ case SCTP_CID_I_FWD_TSN:
case SCTP_CID_RECONF:
status = sctp_packet_transmit_chunk(packet, chunk,
one_packet, gfp);
@@ -950,7 +955,8 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
* sender MUST assure that at least one T3-rtx
* timer is running.
*/
- if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+ if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN ||
+ chunk->chunk_hdr->type == SCTP_CID_I_FWD_TSN) {
sctp_transport_reset_t3_rtx(transport);
transport->last_time_sent = jiffies;
}
@@ -1366,7 +1372,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
asoc->peer.rwnd = sack_a_rwnd;
- sctp_generate_fwdtsn(q, sack_ctsn);
+ asoc->stream.si->generate_ftsn(q, sack_ctsn);
pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn);
pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, "
@@ -1434,7 +1440,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
/* If this chunk has not been acked, stop
* considering it as 'outstanding'.
*/
- if (!tchunk->tsn_gap_acked) {
+ if (transmitted_queue != &q->retransmit &&
+ !tchunk->tsn_gap_acked) {
if (tchunk->transport)
tchunk->transport->flight_size -=
sctp_data_size(tchunk);
@@ -1788,7 +1795,7 @@ static inline int sctp_get_skip_pos(struct sctp_fwdtsn_skip *skiplist,
}
/* Create and add a fwdtsn chunk to the outq's control queue if needed. */
-static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
+void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
{
struct sctp_association *asoc = q->asoc;
struct sctp_chunk *ftsn_chunk = NULL;
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
deleted file mode 100644
index 1280f85a598d..000000000000
--- a/net/sctp/probe.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * sctp_probe - Observe the SCTP flow with kprobes.
- *
- * The idea for this came from Werner Almesberger's umlsim
- * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
- *
- * Modified for SCTP from Stephen Hemminger's code
- * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kprobes.h>
-#include <linux/socket.h>
-#include <linux/sctp.h>
-#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/kfifo.h>
-#include <linux/time.h>
-#include <net/net_namespace.h>
-
-#include <net/sctp/sctp.h>
-#include <net/sctp/sm.h>
-
-MODULE_SOFTDEP("pre: sctp");
-MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
-MODULE_DESCRIPTION("SCTP snooper");
-MODULE_LICENSE("GPL");
-
-static int port __read_mostly = 0;
-MODULE_PARM_DESC(port, "Port to match (0=all)");
-module_param(port, int, 0);
-
-static unsigned int fwmark __read_mostly = 0;
-MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
-module_param(fwmark, uint, 0);
-
-static int bufsize __read_mostly = 64 * 1024;
-MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
-module_param(bufsize, int, 0);
-
-static int full __read_mostly = 1;
-MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
-module_param(full, int, 0);
-
-static const char procname[] = "sctpprobe";
-
-static struct {
- struct kfifo fifo;
- spinlock_t lock;
- wait_queue_head_t wait;
- struct timespec64 tstart;
-} sctpw;
-
-static __printf(1, 2) void printl(const char *fmt, ...)
-{
- va_list args;
- int len;
- char tbuf[256];
-
- va_start(args, fmt);
- len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
- va_end(args);
-
- kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
- wake_up(&sctpw.wait);
-}
-
-static int sctpprobe_open(struct inode *inode, struct file *file)
-{
- kfifo_reset(&sctpw.fifo);
- ktime_get_ts64(&sctpw.tstart);
-
- return 0;
-}
-
-static ssize_t sctpprobe_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- int error = 0, cnt = 0;
- unsigned char *tbuf;
-
- if (!buf)
- return -EINVAL;
-
- if (len == 0)
- return 0;
-
- tbuf = vmalloc(len);
- if (!tbuf)
- return -ENOMEM;
-
- error = wait_event_interruptible(sctpw.wait,
- kfifo_len(&sctpw.fifo) != 0);
- if (error)
- goto out_free;
-
- cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
- error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
-
-out_free:
- vfree(tbuf);
-
- return error ? error : cnt;
-}
-
-static const struct file_operations sctpprobe_fops = {
- .owner = THIS_MODULE,
- .open = sctpprobe_open,
- .read = sctpprobe_read,
- .llseek = noop_llseek,
-};
-
-static enum sctp_disposition jsctp_sf_eat_sack(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const union sctp_subtype type,
- void *arg,
- struct sctp_cmd_seq *commands)
-{
- struct sctp_chunk *chunk = arg;
- struct sk_buff *skb = chunk->skb;
- struct sctp_transport *sp;
- static __u32 lcwnd = 0;
- struct timespec64 now;
-
- sp = asoc->peer.primary_path;
-
- if (((port == 0 && fwmark == 0) ||
- asoc->peer.port == port ||
- ep->base.bind_addr.port == port ||
- (fwmark > 0 && skb->mark == fwmark)) &&
- (full || sp->cwnd != lcwnd)) {
- lcwnd = sp->cwnd;
-
- ktime_get_ts64(&now);
- now = timespec64_sub(now, sctpw.tstart);
-
- printl("%lu.%06lu ", (unsigned long) now.tv_sec,
- (unsigned long) now.tv_nsec / NSEC_PER_USEC);
-
- printl("%p %5d %5d %5d %8d %5d ", asoc,
- ep->base.bind_addr.port, asoc->peer.port,
- asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
-
- list_for_each_entry(sp, &asoc->peer.transport_addr_list,
- transports) {
- if (sp == asoc->peer.primary_path)
- printl("*");
-
- printl("%pISc %2u %8u %8u %8u %8u %8u ",
- &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh,
- sp->flight_size, sp->partial_bytes_acked,
- sp->pathmtu);
- }
- printl("\n");
- }
-
- jprobe_return();
- return 0;
-}
-
-static struct jprobe sctp_recv_probe = {
- .kp = {
- .symbol_name = "sctp_sf_eat_sack_6_2",
- },
- .entry = jsctp_sf_eat_sack,
-};
-
-static __init int sctp_setup_jprobe(void)
-{
- int ret = register_jprobe(&sctp_recv_probe);
-
- if (ret) {
- if (request_module("sctp"))
- goto out;
- ret = register_jprobe(&sctp_recv_probe);
- }
-
-out:
- return ret;
-}
-
-static __init int sctpprobe_init(void)
-{
- int ret = -ENOMEM;
-
- /* Warning: if the function signature of sctp_sf_eat_sack_6_2,
- * has been changed, you also have to change the signature of
- * jsctp_sf_eat_sack, otherwise you end up right here!
- */
- BUILD_BUG_ON(__same_type(sctp_sf_eat_sack_6_2,
- jsctp_sf_eat_sack) == 0);
-
- init_waitqueue_head(&sctpw.wait);
- spin_lock_init(&sctpw.lock);
- if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
- return ret;
-
- if (!proc_create(procname, S_IRUSR, init_net.proc_net,
- &sctpprobe_fops))
- goto free_kfifo;
-
- ret = sctp_setup_jprobe();
- if (ret)
- goto remove_proc;
-
- pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
- port, fwmark, bufsize);
- return 0;
-
-remove_proc:
- remove_proc_entry(procname, init_net.proc_net);
-free_kfifo:
- kfifo_free(&sctpw.fifo);
- return ret;
-}
-
-static __exit void sctpprobe_exit(void)
-{
- kfifo_free(&sctpw.fifo);
- remove_proc_entry(procname, init_net.proc_net);
- unregister_jprobe(&sctp_recv_probe);
-}
-
-module_init(sctpprobe_init);
-module_exit(sctpprobe_exit);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 26b4be6b4172..537545ebcb0e 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -95,7 +95,6 @@ static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations sctp_snmp_seq_fops = {
- .owner = THIS_MODULE,
.open = sctp_snmp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -288,12 +287,8 @@ struct sctp_ht_iter {
static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
{
struct sctp_ht_iter *iter = seq->private;
- int err = sctp_transport_walk_start(&iter->hti);
- if (err) {
- iter->start_fail = 1;
- return ERR_PTR(err);
- }
+ sctp_transport_walk_start(&iter->hti);
iter->start_fail = 0;
return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f5172c21349b..6a38c2503649 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1499,6 +1499,7 @@ static __init int sctp_init(void)
INIT_LIST_HEAD(&sctp_address_families);
sctp_v4_pf_init();
sctp_v6_pf_init();
+ sctp_sched_ops_init();
status = register_pernet_subsys(&sctp_defaults_ops);
if (status)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 9bf575f2e8ed..793b05ec692b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
struct sctp_inithdr init;
union sctp_params addrs;
struct sctp_sock *sp;
- __u8 extensions[4];
+ __u8 extensions[5];
size_t chunksize;
__be16 types[2];
int num_ext = 0;
@@ -278,6 +278,11 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
if (sp->adaptation_ind)
chunksize += sizeof(aiparam);
+ if (sp->strm_interleave) {
+ extensions[num_ext] = SCTP_CID_I_DATA;
+ num_ext += 1;
+ }
+
chunksize += vparam_len;
/* Account for AUTH related parameters */
@@ -392,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
struct sctp_inithdr initack;
union sctp_params addrs;
struct sctp_sock *sp;
- __u8 extensions[4];
+ __u8 extensions[5];
size_t chunksize;
int num_ext = 0;
int cookie_len;
@@ -442,6 +447,11 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
if (sp->adaptation_ind)
chunksize += sizeof(aiparam);
+ if (asoc->intl_enable) {
+ extensions[num_ext] = SCTP_CID_I_DATA;
+ num_ext += 1;
+ }
+
if (asoc->peer.auth_capable) {
auth_random = (struct sctp_paramhdr *)asoc->c.auth_random;
chunksize += ntohs(auth_random->length);
@@ -711,38 +721,31 @@ nodata:
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
-struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
const struct sctp_sndrcvinfo *sinfo,
- int data_len, __u8 flags, __u16 ssn,
- gfp_t gfp)
+ int len, __u8 flags, gfp_t gfp)
{
struct sctp_chunk *retval;
struct sctp_datahdr dp;
- int chunk_len;
/* We assign the TSN as LATE as possible, not here when
* creating the chunk.
*/
- dp.tsn = 0;
+ memset(&dp, 0, sizeof(dp));
+ dp.ppid = sinfo->sinfo_ppid;
dp.stream = htons(sinfo->sinfo_stream);
- dp.ppid = sinfo->sinfo_ppid;
/* Set the flags for an unordered send. */
- if (sinfo->sinfo_flags & SCTP_UNORDERED) {
+ if (sinfo->sinfo_flags & SCTP_UNORDERED)
flags |= SCTP_DATA_UNORDERED;
- dp.ssn = 0;
- } else
- dp.ssn = htons(ssn);
- chunk_len = sizeof(dp) + data_len;
- retval = sctp_make_data(asoc, flags, chunk_len, gfp);
+ retval = sctp_make_data(asoc, flags, sizeof(dp) + len, gfp);
if (!retval)
- goto nodata;
+ return NULL;
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
-nodata:
return retval;
}
@@ -1273,7 +1276,6 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
struct sctp_authhdr auth_hdr;
struct sctp_hmac *hmac_desc;
struct sctp_chunk *retval;
- __u8 *hmac;
/* Get the first hmac that the peer told us to use */
hmac_desc = sctp_auth_asoc_get_hmac(asoc);
@@ -1292,7 +1294,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
&auth_hdr);
- hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len);
+ skb_put_zero(retval->skb, hmac_desc->hmac_len);
/* Adjust the chunk header to include the empty MAC */
retval->chunk_hdr->length =
@@ -1415,6 +1417,12 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp);
}
+struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc,
+ __u8 flags, int paylen, gfp_t gfp)
+{
+ return _sctp_make_chunk(asoc, SCTP_CID_I_DATA, flags, paylen, gfp);
+}
+
static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
__u8 type, __u8 flags, int paylen,
gfp_t gfp)
@@ -2032,6 +2040,10 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
if (net->sctp.addip_enable)
asoc->peer.asconf_capable = 1;
break;
+ case SCTP_CID_I_DATA:
+ if (sctp_sk(asoc->base.sk)->strm_interleave)
+ asoc->intl_enable = 1;
+ break;
default:
break;
}
@@ -3523,6 +3535,30 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
return retval;
}
+struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc,
+ __u32 new_cum_tsn, size_t nstreams,
+ struct sctp_ifwdtsn_skip *skiplist)
+{
+ struct sctp_chunk *retval = NULL;
+ struct sctp_ifwdtsn_hdr ftsn_hdr;
+ size_t hint;
+
+ hint = (nstreams + 1) * sizeof(__u32);
+
+ retval = sctp_make_control(asoc, SCTP_CID_I_FWD_TSN, 0, hint,
+ GFP_ATOMIC);
+ if (!retval)
+ return NULL;
+
+ ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn);
+ retval->subh.ifwdtsn_hdr =
+ sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr);
+
+ sctp_addto_chunk(retval, nstreams * sizeof(skiplist[0]), skiplist);
+
+ return retval;
+}
+
/* RE-CONFIG 3.1 (RE-CONFIG chunk)
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index df94d77401e7..b71e7fb0a20a 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -632,7 +632,7 @@ static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
struct sctp_chunk *abort;
/* Cancel any partial delivery in progress. */
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->abort_pd(&asoc->ulpq, GFP_ATOMIC);
if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
@@ -878,12 +878,12 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
* successfully completed a connect() call.
*/
if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
- sk->sk_state = SCTP_SS_ESTABLISHED;
+ inet_sk_set_state(sk, SCTP_SS_ESTABLISHED);
/* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */
if (sctp_state(asoc, SHUTDOWN_RECEIVED) &&
sctp_sstate(sk, ESTABLISHED)) {
- sk->sk_state = SCTP_SS_CLOSING;
+ inet_sk_set_state(sk, SCTP_SS_CLOSING);
sk->sk_shutdown |= RCV_SHUTDOWN;
}
}
@@ -972,7 +972,7 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
if (!ev)
return;
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
switch (err_hdr->cause) {
case SCTP_ERROR_UNKNOWN_CHUNK:
@@ -1007,18 +1007,6 @@ static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
}
}
-/* Process variable FWDTSN chunk information. */
-static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
- struct sctp_chunk *chunk)
-{
- struct sctp_fwdtsn_skip *skip;
-
- /* Walk through all the skipped SSNs */
- sctp_walk_fwdtsn(skip, chunk) {
- sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
- }
-}
-
/* Helper function to remove the association non-primary peer
* transports.
*/
@@ -1058,7 +1046,7 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
asoc->c.sinit_max_instreams,
NULL, GFP_ATOMIC);
if (ev)
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
/* Helper function to generate an adaptation indication event */
@@ -1070,7 +1058,7 @@ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
if (ev)
- sctp_ulpq_tail_event(&asoc->ulpq, ev);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
@@ -1368,18 +1356,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
break;
case SCTP_CMD_REPORT_FWDTSN:
- /* Move the Cumulattive TSN Ack ahead. */
- sctp_tsnmap_skip(&asoc->peer.tsn_map, cmd->obj.u32);
-
- /* purge the fragmentation queue */
- sctp_ulpq_reasm_flushtsn(&asoc->ulpq, cmd->obj.u32);
-
- /* Abort any in progress partial delivery. */
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->report_ftsn(&asoc->ulpq, cmd->obj.u32);
break;
case SCTP_CMD_PROCESS_FWDTSN:
- sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk);
+ asoc->stream.si->handle_ftsn(&asoc->ulpq,
+ cmd->obj.chunk);
break;
case SCTP_CMD_GEN_SACK:
@@ -1483,8 +1465,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n",
__func__, cmd->obj.chunk, &asoc->ulpq);
- sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk,
- GFP_ATOMIC);
+ asoc->stream.si->ulpevent_data(&asoc->ulpq,
+ cmd->obj.chunk,
+ GFP_ATOMIC);
break;
case SCTP_CMD_EVENT_ULP:
@@ -1492,7 +1475,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n",
__func__, cmd->obj.ulpevent, &asoc->ulpq);
- sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent);
+ asoc->stream.si->enqueue_event(&asoc->ulpq,
+ cmd->obj.ulpevent);
break;
case SCTP_CMD_REPLY:
@@ -1729,12 +1713,13 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
break;
case SCTP_CMD_PART_DELIVER:
- sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->start_pd(&asoc->ulpq, GFP_ATOMIC);
break;
case SCTP_CMD_RENEGE:
- sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk,
- GFP_ATOMIC);
+ asoc->stream.si->renege_events(&asoc->ulpq,
+ cmd->obj.chunk,
+ GFP_ATOMIC);
break;
case SCTP_CMD_SETUP_T4:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8f8ccded13e4..eb7905ffe5f2 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -59,6 +59,9 @@
#include <net/sctp/sm.h>
#include <net/sctp/structs.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/sctp.h>
+
static struct sctp_packet *sctp_abort_pkt_new(
struct net *net,
const struct sctp_endpoint *ep,
@@ -3013,7 +3016,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3034,7 +3037,7 @@ enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr,
- sizeof(struct sctp_datahdr));
+ sctp_datahdr_len(&asoc->stream));
default:
BUG();
}
@@ -3133,7 +3136,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4(
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_datachk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3150,7 +3153,7 @@ enum sctp_disposition sctp_sf_eat_data_fast_4_4(
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr,
- sizeof(struct sctp_datahdr));
+ sctp_datahdr_len(&asoc->stream));
default:
BUG();
}
@@ -3219,6 +3222,8 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
struct sctp_sackhdr *sackh;
__u32 ctsn;
+ trace_sctp_probe(ep, asoc, chunk);
+
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -3957,7 +3962,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
{
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
struct sctp_chunk *chunk = arg;
- struct sctp_fwdtsn_skip *skip;
__u16 len;
__u32 tsn;
@@ -3971,7 +3975,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the FORWARD_TSN chunk has valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3990,14 +3994,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
goto discard_noforce;
- /* Silently discard the chunk if stream-id is not valid */
- sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->stream.incnt)
- goto discard_noforce;
- }
+ if (!asoc->stream.si->validate_ftsn(chunk))
+ goto discard_noforce;
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
- if (len > sizeof(struct sctp_fwdtsn_hdr))
+ if (len > sctp_ftsnhdr_len(&asoc->stream))
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN,
SCTP_CHUNK(chunk));
@@ -4028,7 +4029,6 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
{
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
struct sctp_chunk *chunk = arg;
- struct sctp_fwdtsn_skip *skip;
__u16 len;
__u32 tsn;
@@ -4042,7 +4042,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the FORWARD_TSN chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
+ if (!sctp_chunk_length_valid(chunk, sctp_ftsnchk_len(&asoc->stream)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -4061,14 +4061,11 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0)
goto gen_shutdown;
- /* Silently discard the chunk if stream-id is not valid */
- sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->stream.incnt)
- goto gen_shutdown;
- }
+ if (!asoc->stream.si->validate_ftsn(chunk))
+ goto gen_shutdown;
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn));
- if (len > sizeof(struct sctp_fwdtsn_hdr))
+ if (len > sctp_ftsnhdr_len(&asoc->stream))
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN,
SCTP_CHUNK(chunk));
@@ -6244,14 +6241,12 @@ static int sctp_eat_data(const struct sctp_association *asoc,
struct sctp_chunk *err;
enum sctp_verb deliver;
size_t datalen;
- u8 ordered = 0;
- u16 ssn, sid;
__u32 tsn;
int tmp;
data_hdr = (struct sctp_datahdr *)chunk->skb->data;
chunk->subh.data_hdr = data_hdr;
- skb_pull(chunk->skb, sizeof(*data_hdr));
+ skb_pull(chunk->skb, sctp_datahdr_len(&asoc->stream));
tsn = ntohl(data_hdr->tsn);
pr_debug("%s: TSN 0x%x\n", __func__, tsn);
@@ -6299,7 +6294,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* Actually, allow a little bit of overflow (up to a MTU).
*/
datalen = ntohs(chunk->chunk_hdr->length);
- datalen -= sizeof(struct sctp_data_chunk);
+ datalen -= sctp_datachk_len(&asoc->stream);
deliver = SCTP_CMD_CHUNK_ULP;
@@ -6394,7 +6389,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS);
if (chunk->asoc)
chunk->asoc->stats.iodchunks++;
- ordered = 1;
}
/* RFC 2960 6.5 Stream Identifier and Stream Sequence Number
@@ -6405,8 +6399,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* with cause set to "Invalid Stream Identifier" (See Section 3.3.10)
* and discard the DATA chunk.
*/
- sid = ntohs(data_hdr->stream);
- if (sid >= asoc->stream.incnt) {
+ if (ntohs(data_hdr->stream) >= asoc->stream.incnt) {
/* Mark tsn as received even though we drop it */
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
@@ -6427,8 +6420,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* SSN is smaller then the next expected one. If it is, it wrapped
* and is invalid.
*/
- ssn = ntohs(data_hdr->ssn);
- if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid)))
+ if (!asoc->stream.si->validate_data(chunk))
return SCTP_IERROR_PROTO_VIOLATION;
/* Send the data up to the user. Note: Schedule the
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 79b6bee5b768..691d9dc620e3 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -985,11 +985,14 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
if (state > SCTP_STATE_MAX)
return &bug;
+ if (cid == SCTP_CID_I_DATA)
+ cid = SCTP_CID_DATA;
+
if (cid <= SCTP_CID_BASE_MAX)
return &chunk_event_table[cid][state];
if (net->sctp.prsctp_enable) {
- if (cid == SCTP_CID_FWD_TSN)
+ if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN)
return &prsctp_chunk_event_table[0][state];
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3204a9b29407..356e387f82e7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -85,7 +85,7 @@
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len, struct sock **orig_sk);
+ size_t msg_len);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -188,19 +188,35 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
list_for_each_entry(chunk, &t->transmitted, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->retransmit, list)
+ list_for_each_entry(chunk, &q->retransmit, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->sacked, list)
+ list_for_each_entry(chunk, &q->sacked, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->abandoned, list)
+ list_for_each_entry(chunk, &q->abandoned, transmitted_list)
cb(chunk);
list_for_each_entry(chunk, &q->out_chunk_list, list)
cb(chunk);
}
+static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk,
+ void (*cb)(struct sk_buff *, struct sock *))
+
+{
+ struct sk_buff *skb, *tmp;
+
+ sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp)
+ cb(skb, sk);
+
+ sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp)
+ cb(skb, sk);
+
+ sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp)
+ cb(skb, sk);
+}
+
/* Verify that this is a valid address. */
static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
int len)
@@ -335,16 +351,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
if (len < sizeof (struct sockaddr))
return NULL;
+ if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+ return NULL;
+
/* V4 mapped address are really of AF_INET family */
if (addr->sa.sa_family == AF_INET6 &&
- ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
- if (!opt->pf->af_supported(AF_INET, opt))
- return NULL;
- } else {
- /* Does this PF support this AF? */
- if (!opt->pf->af_supported(addr->sa.sa_family, opt))
- return NULL;
- }
+ ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+ !opt->pf->af_supported(AF_INET, opt))
+ return NULL;
/* If we get this far, af is valid. */
af = sctp_get_af_specific(addr->sa.sa_family);
@@ -970,13 +984,6 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
* This is used for tunneling the sctp_bindx() request through sctp_setsockopt()
* from userspace.
*
- * We don't use copy_from_user() for optimization: we first do the
- * sanity checks (buffer size -fast- and access check-healthy
- * pointer); if all of those succeed, then we can alloc the memory
- * (expensive operation) needed to copy the data to kernel. Then we do
- * the copying without checking the user space area
- * (__copy_from_user()).
- *
* On exit there is no need to do sockfd_put(), sys_setsockopt() does
* it.
*
@@ -1006,25 +1013,15 @@ static int sctp_setsockopt_bindx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- /* Check the user passed a healthy pointer. */
- if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
- return -EFAULT;
-
- /* Alloc space for the address array in kernel memory. */
- kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
- if (unlikely(!kaddrs))
- return -ENOMEM;
-
- if (__copy_from_user(kaddrs, addrs, addrs_size)) {
- kfree(kaddrs);
- return -EFAULT;
- }
+ kaddrs = vmemdup_user(addrs, addrs_size);
+ if (unlikely(IS_ERR(kaddrs)))
+ return PTR_ERR(kaddrs);
/* Walk through the addrs buffer and count the number of addresses. */
addr_buf = kaddrs;
while (walk_size < addrs_size) {
if (walk_size + sizeof(sa_family_t) > addrs_size) {
- kfree(kaddrs);
+ kvfree(kaddrs);
return -EINVAL;
}
@@ -1035,7 +1032,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
* causes the address buffer to overflow return EINVAL.
*/
if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
- kfree(kaddrs);
+ kvfree(kaddrs);
return -EINVAL;
}
addrcnt++;
@@ -1065,7 +1062,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
}
out:
- kfree(kaddrs);
+ kvfree(kaddrs);
return err;
}
@@ -1323,13 +1320,6 @@ out_free:
* land and invoking either sctp_connectx(). This is used for tunneling
* the sctp_connectx() request through sctp_setsockopt() from userspace.
*
- * We don't use copy_from_user() for optimization: we first do the
- * sanity checks (buffer size -fast- and access check-healthy
- * pointer); if all of those succeed, then we can alloc the memory
- * (expensive operation) needed to copy the data to kernel. Then we do
- * the copying without checking the user space area
- * (__copy_from_user()).
- *
* On exit there is no need to do sockfd_put(), sys_setsockopt() does
* it.
*
@@ -1345,7 +1335,6 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
sctp_assoc_t *assoc_id)
{
struct sockaddr *kaddrs;
- gfp_t gfp = GFP_KERNEL;
int err = 0;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
@@ -1354,24 +1343,12 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- /* Check the user passed a healthy pointer. */
- if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
- return -EFAULT;
+ kaddrs = vmemdup_user(addrs, addrs_size);
+ if (unlikely(IS_ERR(kaddrs)))
+ return PTR_ERR(kaddrs);
- /* Alloc space for the address array in kernel memory. */
- if (sk->sk_socket->file)
- gfp = GFP_USER | __GFP_NOWARN;
- kaddrs = kmalloc(addrs_size, gfp);
- if (unlikely(!kaddrs))
- return -ENOMEM;
-
- if (__copy_from_user(kaddrs, addrs, addrs_size)) {
- err = -EFAULT;
- } else {
- err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
- }
-
- kfree(kaddrs);
+ err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
+ kvfree(kaddrs);
return err;
}
@@ -1528,7 +1505,7 @@ static void sctp_close(struct sock *sk, long timeout)
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
sk->sk_shutdown = SHUTDOWN_MASK;
- sk->sk_state = SCTP_SS_CLOSING;
+ inet_sk_set_state(sk, SCTP_SS_CLOSING);
ep = sctp_sk(sk)->ep;
@@ -1554,6 +1531,7 @@ static void sctp_close(struct sock *sk, long timeout)
if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) ||
!skb_queue_empty(&asoc->ulpq.reasm) ||
+ !skb_queue_empty(&asoc->ulpq.reasm_uo) ||
(sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
struct sctp_chunk *chunk;
@@ -1883,8 +1861,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
*/
if (sinit) {
if (sinit->sinit_num_ostreams) {
- asoc->c.sinit_num_ostreams =
- sinit->sinit_num_ostreams;
+ __u16 outcnt = sinit->sinit_num_ostreams;
+
+ asoc->c.sinit_num_ostreams = outcnt;
+ /* outcnt has been changed, so re-init stream */
+ err = sctp_stream_init(&asoc->stream, outcnt, 0,
+ GFP_KERNEL);
+ if (err)
+ goto out_free;
}
if (sinit->sinit_max_instreams) {
asoc->c.sinit_max_instreams =
@@ -1971,7 +1955,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
/* sk can be changed by peel off when waiting for buf. */
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
if (err) {
if (err == -ESRCH) {
/* asoc is already dead. */
@@ -2002,7 +1986,20 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
if (err < 0)
goto out_free;
- wait_connect = true;
+ /* If stream interleave is enabled, wait_connect has to be
+ * done earlier than data enqueue, as it needs to make data
+ * or idata according to asoc->intl_enable which is set
+ * after connection is done.
+ */
+ if (sctp_sk(asoc->base.sk)->strm_interleave) {
+ timeo = sock_sndtimeo(sk, 0);
+ err = sctp_wait_for_connect(asoc, &timeo);
+ if (err)
+ goto out_unlock;
+ } else {
+ wait_connect = true;
+ }
+
pr_debug("%s: we associated primitively\n", __func__);
}
@@ -2277,11 +2274,11 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
event = sctp_ulpevent_make_sender_dry_event(asoc,
- GFP_ATOMIC);
+ GFP_USER | __GFP_NOWARN);
if (!event)
return -ENOMEM;
- sctp_ulpq_tail_event(&asoc->ulpq, event);
+ asoc->stream.si->enqueue_event(&asoc->ulpq, event);
}
}
@@ -3180,7 +3177,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
if (val == 0) {
val = asoc->pathmtu - sp->pf->af->net_header_len;
val -= sizeof(struct sctphdr) +
- sizeof(struct sctp_data_chunk);
+ sctp_datachk_len(&asoc->stream);
}
asoc->user_frag = val;
asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
@@ -3350,7 +3347,10 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk,
if (get_user(val, (int __user *)optval))
return -EFAULT;
- sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+ sctp_sk(sk)->frag_interleave = !!val;
+
+ if (!sctp_sk(sk)->frag_interleave)
+ sctp_sk(sk)->strm_interleave = 0;
return 0;
}
@@ -3498,6 +3498,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
if (optlen < sizeof(struct sctp_hmacalgo))
return -EINVAL;
+ optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
+ SCTP_AUTH_NUM_HMACS * sizeof(u16));
hmacs = memdup_user(optval, optlen);
if (IS_ERR(hmacs))
@@ -3536,6 +3538,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
if (optlen <= sizeof(struct sctp_authkey))
return -EINVAL;
+ /* authkey->sca_keylength is u16, so optlen can't be bigger than
+ * this.
+ */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(struct sctp_authkey));
authkey = memdup_user(optval, optlen);
if (IS_ERR(authkey))
@@ -3891,13 +3898,20 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen < sizeof(struct sctp_reset_streams))
+ if (optlen < sizeof(*params))
return -EINVAL;
+ /* srs_number_streams is u16, so optlen can't be bigger than this. */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(__u16) * sizeof(*params));
params = memdup_user(optval, optlen);
if (IS_ERR(params))
return PTR_ERR(params);
+ if (params->srs_number_streams * sizeof(__u16) >
+ optlen - sizeof(*params))
+ goto out;
+
asoc = sctp_id2assoc(sk, params->srs_assoc_id);
if (!asoc)
goto out;
@@ -4019,6 +4033,40 @@ out:
return retval;
}
+static int sctp_setsockopt_interleaving_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct net *net = sock_net(sk);
+ struct sctp_assoc_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (params.assoc_id)
+ goto out;
+
+ if (!net->sctp.intl_enable || !sp->frag_interleave) {
+ retval = -EPERM;
+ goto out;
+ }
+
+ sp->strm_interleave = !!params.assoc_value;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4206,6 +4254,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_STREAM_SCHEDULER_VALUE:
retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
break;
+ case SCTP_INTERLEAVING_SUPPORTED:
+ retval = sctp_setsockopt_interleaving_supported(sk, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4494,7 +4546,7 @@ static int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock);
local_bh_disable();
- percpu_counter_inc(&sctp_sockets_allocated);
+ sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
/* Nothing can fail after this block, otherwise
@@ -4538,7 +4590,7 @@ static void sctp_destroy_sock(struct sock *sk)
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
- percpu_counter_dec(&sctp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
}
@@ -4582,7 +4634,7 @@ static void sctp_shutdown(struct sock *sk, int how)
if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
struct sctp_association *asoc;
- sk->sk_state = SCTP_SS_CLOSING;
+ inet_sk_set_state(sk, SCTP_SS_CLOSING);
asoc = list_entry(ep->asocs.next,
struct sctp_association, asocs);
sctp_primitive_SHUTDOWN(net, asoc, NULL);
@@ -4676,20 +4728,11 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
EXPORT_SYMBOL_GPL(sctp_get_sctp_info);
/* use callback to avoid exporting the core structure */
-int sctp_transport_walk_start(struct rhashtable_iter *iter)
+void sctp_transport_walk_start(struct rhashtable_iter *iter)
{
- int err;
-
rhltable_walk_enter(&sctp_transport_hashtable, iter);
- err = rhashtable_walk_start(iter);
- if (err && err != -EAGAIN) {
- rhashtable_walk_stop(iter);
- rhashtable_walk_exit(iter);
- return err;
- }
-
- return 0;
+ rhashtable_walk_start(iter);
}
void sctp_transport_walk_stop(struct rhashtable_iter *iter)
@@ -4783,9 +4826,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
int ret;
again:
- ret = sctp_transport_walk_start(&hti);
- if (ret)
- return ret;
+ ret = 0;
+ sctp_transport_walk_start(&hti);
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
@@ -5011,7 +5053,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
len = sizeof(int);
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int)))
+ if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
return -EFAULT;
return 0;
}
@@ -5080,7 +5122,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p
*newfile = sock_alloc_file(newsock, 0, NULL);
if (IS_ERR(*newfile)) {
put_unused_fd(retval);
- sock_release(newsock);
retval = PTR_ERR(*newfile);
*newfile = NULL;
return retval;
@@ -5642,6 +5683,9 @@ copy_getaddrs:
err = -EFAULT;
goto out;
}
+ /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
+ * but we can't change it anymore.
+ */
if (put_user(bytes_copied, optlen))
err = -EFAULT;
out:
@@ -6078,7 +6122,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
params.assoc_id = 0;
} else if (len >= sizeof(struct sctp_assoc_value)) {
len = sizeof(struct sctp_assoc_value);
- if (copy_from_user(&params, optval, sizeof(params)))
+ if (copy_from_user(&params, optval, len))
return -EFAULT;
} else
return -EINVAL;
@@ -6248,7 +6292,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
if (len < sizeof(struct sctp_authkeyid))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
+
+ len = sizeof(struct sctp_authkeyid);
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id);
@@ -6260,7 +6306,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
else
val.scact_keynumber = ep->active_key_id;
- len = sizeof(struct sctp_authkeyid);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, &val, len))
@@ -6286,7 +6331,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
@@ -6331,7 +6376,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
@@ -6981,6 +7026,47 @@ out:
return retval;
}
+static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->intl_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->strm_interleave;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -7171,6 +7257,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_scheduler_value(sk, len, optval,
optlen);
break;
+ case SCTP_INTERLEAVING_SUPPORTED:
+ retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -7405,13 +7495,13 @@ static int sctp_listen_start(struct sock *sk, int backlog)
* sockets.
*
*/
- sk->sk_state = SCTP_SS_LISTENING;
+ inet_sk_set_state(sk, SCTP_SS_LISTENING);
if (!ep->base.bind_addr.port) {
if (sctp_autobind(sk))
return -EAGAIN;
} else {
if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
- sk->sk_state = SCTP_SS_CLOSED;
+ inet_sk_set_state(sk, SCTP_SS_CLOSED);
return -EADDRINUSE;
}
}
@@ -7497,11 +7587,11 @@ out:
* here, again, by modeling the current TCP/UDP code. We don't have
* a good way to test with it yet.
*/
-unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct sctp_sock *sp = sctp_sk(sk);
- unsigned int mask;
+ __poll_t mask;
poll_wait(file, sk_sleep(sk), wait);
@@ -7999,12 +8089,12 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len, struct sock **orig_sk)
+ size_t msg_len)
{
struct sock *sk = asoc->base.sk;
- int err = 0;
long current_timeo = *timeo_p;
DEFINE_WAIT(wait);
+ int err = 0;
pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
*timeo_p, msg_len);
@@ -8033,17 +8123,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
- if (sk != asoc->base.sk) {
- release_sock(sk);
- sk = asoc->base.sk;
- lock_sock(sk);
- }
+ if (sk != asoc->base.sk)
+ goto do_error;
*timeo_p = current_timeo;
}
out:
- *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
@@ -8408,11 +8494,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
}
- sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp)
- sctp_skb_set_owner_r_frag(skb, newsk);
-
- sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp)
- sctp_skb_set_owner_r_frag(skb, newsk);
+ sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag);
/* Set the type of socket to indicate that it is peeled off from the
* original UDP-style socket or created with the accept() call on a
@@ -8438,10 +8520,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* is called, set RCV_SHUTDOWN flag.
*/
if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
- newsk->sk_state = SCTP_SS_CLOSED;
+ inet_sk_set_state(newsk, SCTP_SS_CLOSED);
newsk->sk_shutdown |= RCV_SHUTDOWN;
} else {
- newsk->sk_state = SCTP_SS_ESTABLISHED;
+ inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED);
}
release_sock(newsk);
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index a11db21dc8a0..cedf672487f9 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
*/
/* Mark as failed send. */
- sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
+ sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM);
if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
sctp_stream_outq_migrate(stream, NULL, outcnt);
sched->sched_all(stream);
- i = sctp_stream_alloc_out(stream, outcnt, gfp);
- if (i)
- return i;
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+ goto out;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
@@ -167,22 +167,21 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
sched->init(stream);
in:
+ sctp_stream_interleave_init(stream);
if (!incnt)
goto out;
- i = sctp_stream_alloc_in(stream, incnt, gfp);
- if (i) {
- ret = -ENOMEM;
- goto free;
+ ret = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (ret) {
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+ stream->outcnt = 0;
+ goto out;
}
stream->incnt = incnt;
- goto out;
-free:
- sched->free(stream);
- kfree(stream->out);
- stream->out = NULL;
out:
return ret;
}
@@ -215,11 +214,13 @@ void sctp_stream_clear(struct sctp_stream *stream)
{
int i;
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
}
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
@@ -254,6 +255,30 @@ static int sctp_send_reconf(struct sctp_association *asoc,
return retval;
}
+static bool sctp_stream_outq_is_empty(struct sctp_stream *stream,
+ __u16 str_nums, __be16 *str_list)
+{
+ struct sctp_association *asoc;
+ __u16 i;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ if (!asoc->outqueue.out_qlen)
+ return true;
+
+ if (!str_nums)
+ return false;
+
+ for (i = 0; i < str_nums; i++) {
+ __u16 sid = ntohs(str_list[i]);
+
+ if (stream->out[sid].ext &&
+ !list_empty(&stream->out[sid].ext->outq))
+ return false;
+ }
+
+ return true;
+}
+
int sctp_send_reset_streams(struct sctp_association *asoc,
struct sctp_reset_streams *params)
{
@@ -317,6 +342,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
for (i = 0; i < str_nums; i++)
nstr_list[i] = htons(str_list[i]);
+ if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) {
+ retval = -EAGAIN;
+ goto out;
+ }
+
chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
kfree(nstr_list);
@@ -377,6 +407,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
if (asoc->strreset_outstanding)
return -EINPROGRESS;
+ if (!sctp_outq_is_empty(&asoc->outqueue))
+ return -EAGAIN;
+
chunk = sctp_make_strreset_tsnreq(asoc);
if (!chunk)
return -ENOMEM;
@@ -563,7 +596,7 @@ struct sctp_chunk *sctp_process_strreset_outreq(
flags = SCTP_STREAM_RESET_INCOMING_SSN;
}
- nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2;
+ nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
if (nums) {
str_p = outreq->list_of_streams;
for (i = 0; i < nums; i++) {
@@ -574,10 +607,10 @@ struct sctp_chunk *sctp_process_strreset_outreq(
}
for (i = 0; i < nums; i++)
- stream->in[ntohs(str_p[i])].ssn = 0;
+ stream->in[ntohs(str_p[i])].mid = 0;
} else {
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
}
result = SCTP_STRRESET_PERFORMED;
@@ -627,7 +660,7 @@ struct sctp_chunk *sctp_process_strreset_inreq(
goto out;
}
- nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2;
+ nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16);
str_p = inreq->list_of_streams;
for (i = 0; i < nums; i++) {
if (ntohs(str_p[i]) >= stream->outcnt) {
@@ -636,6 +669,12 @@ struct sctp_chunk *sctp_process_strreset_inreq(
}
}
+ if (!sctp_stream_outq_is_empty(stream, nums, str_p)) {
+ result = SCTP_STRRESET_IN_PROGRESS;
+ asoc->strreset_inseq--;
+ goto err;
+ }
+
chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0);
if (!chunk)
goto out;
@@ -687,12 +726,18 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
i = asoc->strreset_inseq - request_seq - 1;
result = asoc->strreset_result[i];
if (result == SCTP_STRRESET_PERFORMED) {
- next_tsn = asoc->next_tsn;
+ next_tsn = asoc->ctsn_ack_point + 1;
init_tsn =
sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1;
}
goto err;
}
+
+ if (!sctp_outq_is_empty(&asoc->outqueue)) {
+ result = SCTP_STRRESET_IN_PROGRESS;
+ goto err;
+ }
+
asoc->strreset_inseq++;
if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
@@ -703,13 +748,13 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
goto out;
}
- /* G3: The same processing as though a SACK chunk with no gap report
- * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
- * received MUST be performed.
+ /* G4: The same processing as though a FWD-TSN chunk (as defined in
+ * [RFC3758]) with all streams affected and a new cumulative TSN
+ * ACK of the Receiver's Next TSN minus 1 were received MUST be
+ * performed.
*/
max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
- sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->report_ftsn(&asoc->ulpq, max_tsn_seen);
/* G1: Compute an appropriate value for the Receiver's Next TSN -- the
* TSN that the peer should use to send the next DATA chunk. The
@@ -720,10 +765,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
init_tsn, GFP_ATOMIC);
- /* G4: The same processing as though a FWD-TSN chunk (as defined in
- * [RFC3758]) with all streams affected and a new cumulative TSN
- * ACK of the Receiver's Next TSN minus 1 were received MUST be
- * performed.
+ /* G3: The same processing as though a SACK chunk with no gap report
+ * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
+ * received MUST be performed.
*/
sctp_outq_free(&asoc->outqueue);
@@ -739,10 +783,12 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
/* G5: The next expected and outgoing SSNs MUST be reset to 0 for all
* incoming and outgoing streams.
*/
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
result = SCTP_STRRESET_PERFORMED;
@@ -927,15 +973,20 @@ struct sctp_chunk *sctp_process_strreset_resp(
outreq = (struct sctp_strreset_outreq *)req;
str_p = outreq->list_of_streams;
- nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2;
+ nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) /
+ sizeof(__u16);
if (result == SCTP_STRRESET_PERFORMED) {
if (nums) {
- for (i = 0; i < nums; i++)
- stream->out[ntohs(str_p[i])].ssn = 0;
+ for (i = 0; i < nums; i++) {
+ stream->out[ntohs(str_p[i])].mid = 0;
+ stream->out[ntohs(str_p[i])].mid_uo = 0;
+ }
} else {
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
}
flags = SCTP_STREAM_RESET_OUTGOING_SSN;
@@ -956,7 +1007,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
inreq = (struct sctp_strreset_inreq *)req;
str_p = inreq->list_of_streams;
- nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2;
+ nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
+ sizeof(__u16);
*evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
nums, str_p, GFP_ATOMIC);
@@ -975,24 +1027,32 @@ struct sctp_chunk *sctp_process_strreset_resp(
if (result == SCTP_STRRESET_PERFORMED) {
__u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
&asoc->peer.tsn_map);
+ LIST_HEAD(temp);
- sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
- sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+ asoc->stream.si->report_ftsn(&asoc->ulpq, mtsn);
sctp_tsnmap_init(&asoc->peer.tsn_map,
SCTP_TSN_MAP_INITIAL,
stsn, GFP_ATOMIC);
+ /* Clean up sacked and abandoned queues only. As the
+ * out_chunk_list may not be empty, splice it to temp,
+ * then get it back after sctp_outq_free is done.
+ */
+ list_splice_init(&asoc->outqueue.out_chunk_list, &temp);
sctp_outq_free(&asoc->outqueue);
+ list_splice_init(&temp, &asoc->outqueue.out_chunk_list);
asoc->next_tsn = rtsn;
asoc->ctsn_ack_point = asoc->next_tsn - 1;
asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].ssn = 0;
+ for (i = 0; i < stream->outcnt; i++) {
+ stream->out[i].mid = 0;
+ stream->out[i].mid_uo = 0;
+ }
for (i = 0; i < stream->incnt; i++)
- stream->in[i].ssn = 0;
+ stream->in[i].mid = 0;
}
for (i = 0; i < stream->outcnt; i++)
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
new file mode 100644
index 000000000000..8c7cf8f08711
--- /dev/null
+++ b/net/sctp/stream_interleave.c
@@ -0,0 +1,1334 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
+#include <net/busy_poll.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/ulpevent.h>
+#include <linux/sctp.h>
+
+static struct sctp_chunk *sctp_make_idatafrag_empty(
+ const struct sctp_association *asoc,
+ const struct sctp_sndrcvinfo *sinfo,
+ int len, __u8 flags, gfp_t gfp)
+{
+ struct sctp_chunk *retval;
+ struct sctp_idatahdr dp;
+
+ memset(&dp, 0, sizeof(dp));
+ dp.stream = htons(sinfo->sinfo_stream);
+
+ if (sinfo->sinfo_flags & SCTP_UNORDERED)
+ flags |= SCTP_DATA_UNORDERED;
+
+ retval = sctp_make_idata(asoc, flags, sizeof(dp) + len, gfp);
+ if (!retval)
+ return NULL;
+
+ retval->subh.idata_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
+ memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
+
+ return retval;
+}
+
+static void sctp_chunk_assign_mid(struct sctp_chunk *chunk)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *lchunk;
+ __u32 cfsn = 0;
+ __u16 sid;
+
+ if (chunk->has_mid)
+ return;
+
+ sid = sctp_chunk_stream_no(chunk);
+ stream = &chunk->asoc->stream;
+
+ list_for_each_entry(lchunk, &chunk->msg->chunks, frag_list) {
+ struct sctp_idatahdr *hdr;
+ __u32 mid;
+
+ lchunk->has_mid = 1;
+
+ hdr = lchunk->subh.idata_hdr;
+
+ if (lchunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG)
+ hdr->ppid = lchunk->sinfo.sinfo_ppid;
+ else
+ hdr->fsn = htonl(cfsn++);
+
+ if (lchunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
+ mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ?
+ sctp_mid_uo_next(stream, out, sid) :
+ sctp_mid_uo_peek(stream, out, sid);
+ } else {
+ mid = lchunk->chunk_hdr->flags & SCTP_DATA_LAST_FRAG ?
+ sctp_mid_next(stream, out, sid) :
+ sctp_mid_peek(stream, out, sid);
+ }
+ hdr->mid = htonl(mid);
+ }
+}
+
+static bool sctp_validate_data(struct sctp_chunk *chunk)
+{
+ const struct sctp_stream *stream;
+ __u16 sid, ssn;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_DATA)
+ return false;
+
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ return true;
+
+ stream = &chunk->asoc->stream;
+ sid = sctp_chunk_stream_no(chunk);
+ ssn = ntohs(chunk->subh.data_hdr->ssn);
+
+ return !SSN_lt(ssn, sctp_ssn_peek(stream, in, sid));
+}
+
+static bool sctp_validate_idata(struct sctp_chunk *chunk)
+{
+ struct sctp_stream *stream;
+ __u32 mid;
+ __u16 sid;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_I_DATA)
+ return false;
+
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ return true;
+
+ stream = &chunk->asoc->stream;
+ sid = sctp_chunk_stream_no(chunk);
+ mid = ntohl(chunk->subh.idata_hdr->mid);
+
+ return !MID_lt(mid, sctp_mid_peek(stream, in, sid));
+}
+
+static void sctp_intl_store_reasm(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *cevent;
+ struct sk_buff *pos;
+
+ pos = skb_peek_tail(&ulpq->reasm);
+ if (!pos) {
+ __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+ return;
+ }
+
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ (cevent->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ event->fsn > cevent->fsn))) {
+ __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+ return;
+ }
+
+ if ((event->stream == cevent->stream &&
+ MID_lt(cevent->mid, event->mid)) ||
+ event->stream > cevent->stream) {
+ __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+ return;
+ }
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream < cevent->stream ||
+ (event->stream == cevent->stream &&
+ MID_lt(event->mid, cevent->mid)))
+ break;
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ (event->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ event->fsn < cevent->fsn))
+ break;
+ }
+
+ __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event));
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_partial(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sctp_stream_in *sin;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ int is_last = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+
+ if (cevent->stream > event->stream ||
+ cevent->mid != sin->mid)
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ goto out;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = cevent->fsn + 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn++;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ goto out;
+ default:
+ goto out;
+ }
+ }
+
+out:
+ if (!first_frag)
+ return NULL;
+
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn = next_fsn;
+ if (is_last) {
+ retval->msg_flags |= MSG_EOR;
+ sin->pd_mode = 0;
+ }
+ }
+
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_reassembled(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_association *asoc = ulpq->asoc;
+ struct sk_buff *pos, *first_frag = NULL;
+ struct sctp_ulpevent *retval = NULL;
+ struct sk_buff *pd_first = NULL;
+ struct sk_buff *pd_last = NULL;
+ struct sctp_stream_in *sin;
+ __u32 next_fsn = 0;
+ __u32 pd_point = 0;
+ __u32 pd_len = 0;
+ __u32 mid = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+ if (cevent->stream > event->stream)
+ break;
+
+ if (MID_lt(cevent->mid, event->mid))
+ continue;
+ if (MID_lt(event->mid, cevent->mid))
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (cevent->mid == sin->mid) {
+ pd_first = pos;
+ pd_last = pos;
+ pd_len = pos->len;
+ }
+
+ first_frag = pos;
+ next_fsn = 0;
+ mid = cevent->mid;
+ break;
+
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ if (pd_first) {
+ pd_last = pos;
+ pd_len += pos->len;
+ }
+ } else {
+ first_frag = NULL;
+ }
+ break;
+
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn)
+ goto found;
+ else
+ first_frag = NULL;
+ break;
+ }
+ }
+
+ if (!pd_first)
+ goto out;
+
+ pd_point = sctp_sk(asoc->base.sk)->pd_point;
+ if (pd_point && pd_point <= pd_len) {
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm,
+ pd_first, pd_last);
+ if (retval) {
+ sin->fsn = next_fsn;
+ sin->pd_mode = 1;
+ }
+ }
+ goto out;
+
+found:
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm,
+ first_frag, pos);
+ if (retval)
+ retval->msg_flags |= MSG_EOR;
+
+out:
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_reasm(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *retval = NULL;
+ struct sctp_stream_in *sin;
+
+ if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) {
+ event->msg_flags |= MSG_EOR;
+ return event;
+ }
+
+ sctp_intl_store_reasm(ulpq, event);
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+ if (sin->pd_mode && event->mid == sin->mid &&
+ event->fsn == sin->fsn)
+ retval = sctp_intl_retrieve_partial(ulpq, event);
+
+ if (!retval)
+ retval = sctp_intl_retrieve_reassembled(ulpq, event);
+
+ return retval;
+}
+
+static void sctp_intl_store_ordered(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *cevent;
+ struct sk_buff *pos;
+
+ pos = skb_peek_tail(&ulpq->lobby);
+ if (!pos) {
+ __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+ return;
+ }
+
+ cevent = (struct sctp_ulpevent *)pos->cb;
+ if (event->stream == cevent->stream &&
+ MID_lt(cevent->mid, event->mid)) {
+ __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+ return;
+ }
+
+ if (event->stream > cevent->stream) {
+ __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+ return;
+ }
+
+ skb_queue_walk(&ulpq->lobby, pos) {
+ cevent = (struct sctp_ulpevent *)pos->cb;
+
+ if (cevent->stream > event->stream)
+ break;
+
+ if (cevent->stream == event->stream &&
+ MID_lt(event->mid, cevent->mid))
+ break;
+ }
+
+ __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event));
+}
+
+static void sctp_intl_retrieve_ordered(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff_head *event_list;
+ struct sctp_stream *stream;
+ struct sk_buff *pos, *tmp;
+ __u16 sid = event->stream;
+
+ stream = &ulpq->asoc->stream;
+ event_list = (struct sk_buff_head *)sctp_event2skb(event)->prev;
+
+ sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
+ struct sctp_ulpevent *cevent = (struct sctp_ulpevent *)pos->cb;
+
+ if (cevent->stream > sid)
+ break;
+
+ if (cevent->stream < sid)
+ continue;
+
+ if (cevent->mid != sctp_mid_peek(stream, in, sid))
+ break;
+
+ sctp_mid_next(stream, in, sid);
+
+ __skb_unlink(pos, &ulpq->lobby);
+
+ __skb_queue_tail(event_list, pos);
+ }
+}
+
+static struct sctp_ulpevent *sctp_intl_order(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_stream *stream;
+ __u16 sid;
+
+ stream = &ulpq->asoc->stream;
+ sid = event->stream;
+
+ if (event->mid != sctp_mid_peek(stream, in, sid)) {
+ sctp_intl_store_ordered(ulpq, event);
+ return NULL;
+ }
+
+ sctp_mid_next(stream, in, sid);
+
+ sctp_intl_retrieve_ordered(ulpq, event);
+
+ return event;
+}
+
+static int sctp_enqueue_event(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff *skb = sctp_event2skb(event);
+ struct sock *sk = ulpq->asoc->base.sk;
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct sk_buff_head *skb_list;
+
+ skb_list = (struct sk_buff_head *)skb->prev;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN &&
+ (sk->sk_shutdown & SEND_SHUTDOWN ||
+ !sctp_ulpevent_is_notification(event)))
+ goto out_free;
+
+ if (!sctp_ulpevent_is_notification(event)) {
+ sk_mark_napi_id(sk, skb);
+ sk_incoming_cpu_update(sk);
+ }
+
+ if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
+ goto out_free;
+
+ if (skb_list)
+ skb_queue_splice_tail_init(skb_list,
+ &sk->sk_receive_queue);
+ else
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+
+ if (!sp->data_ready_signalled) {
+ sp->data_ready_signalled = 1;
+ sk->sk_data_ready(sk);
+ }
+
+ return 1;
+
+out_free:
+ if (skb_list)
+ sctp_queue_purge_ulpevents(skb_list);
+ else
+ sctp_ulpevent_free(event);
+
+ return 0;
+}
+
+static void sctp_intl_store_reasm_uo(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *cevent;
+ struct sk_buff *pos;
+
+ pos = skb_peek_tail(&ulpq->reasm_uo);
+ if (!pos) {
+ __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
+ return;
+ }
+
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ (cevent->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ (!(event->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ event->fsn > cevent->fsn))) {
+ __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
+ return;
+ }
+
+ if ((event->stream == cevent->stream &&
+ MID_lt(cevent->mid, event->mid)) ||
+ event->stream > cevent->stream) {
+ __skb_queue_tail(&ulpq->reasm_uo, sctp_event2skb(event));
+ return;
+ }
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ cevent = sctp_skb2event(pos);
+
+ if (event->stream < cevent->stream ||
+ (event->stream == cevent->stream &&
+ MID_lt(event->mid, cevent->mid)))
+ break;
+
+ if (event->stream == cevent->stream &&
+ event->mid == cevent->mid &&
+ !(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
+ (event->msg_flags & SCTP_DATA_FIRST_FRAG ||
+ event->fsn < cevent->fsn))
+ break;
+ }
+
+ __skb_queue_before(&ulpq->reasm_uo, pos, sctp_event2skb(event));
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_partial_uo(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sctp_stream_in *sin;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ int is_last = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+ if (cevent->stream > event->stream)
+ break;
+
+ if (MID_lt(cevent->mid, sin->mid_uo))
+ continue;
+ if (MID_lt(sin->mid_uo, cevent->mid))
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ goto out;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn_uo) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = cevent->fsn + 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn++;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (!first_frag) {
+ if (cevent->fsn == sin->fsn_uo) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ } else if (cevent->fsn == next_fsn) {
+ last_frag = pos;
+ next_fsn = 0;
+ is_last = 1;
+ }
+ goto out;
+ default:
+ goto out;
+ }
+ }
+
+out:
+ if (!first_frag)
+ return NULL;
+
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm_uo, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn_uo = next_fsn;
+ if (is_last) {
+ retval->msg_flags |= MSG_EOR;
+ sin->pd_mode_uo = 0;
+ }
+ }
+
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo(
+ struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_association *asoc = ulpq->asoc;
+ struct sk_buff *pos, *first_frag = NULL;
+ struct sctp_ulpevent *retval = NULL;
+ struct sk_buff *pd_first = NULL;
+ struct sk_buff *pd_last = NULL;
+ struct sctp_stream_in *sin;
+ __u32 next_fsn = 0;
+ __u32 pd_point = 0;
+ __u32 pd_len = 0;
+ __u32 mid = 0;
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ if (cevent->stream < event->stream)
+ continue;
+ if (cevent->stream > event->stream)
+ break;
+
+ if (MID_lt(cevent->mid, event->mid))
+ continue;
+ if (MID_lt(event->mid, cevent->mid))
+ break;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (!sin->pd_mode_uo) {
+ sin->mid_uo = cevent->mid;
+ pd_first = pos;
+ pd_last = pos;
+ pd_len = pos->len;
+ }
+
+ first_frag = pos;
+ next_fsn = 0;
+ mid = cevent->mid;
+ break;
+
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ if (pd_first) {
+ pd_last = pos;
+ pd_len += pos->len;
+ }
+ } else {
+ first_frag = NULL;
+ }
+ break;
+
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag && cevent->mid == mid &&
+ cevent->fsn == next_fsn)
+ goto found;
+ else
+ first_frag = NULL;
+ break;
+ }
+ }
+
+ if (!pd_first)
+ goto out;
+
+ pd_point = sctp_sk(asoc->base.sk)->pd_point;
+ if (pd_point && pd_point <= pd_len) {
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm_uo,
+ pd_first, pd_last);
+ if (retval) {
+ sin->fsn_uo = next_fsn;
+ sin->pd_mode_uo = 1;
+ }
+ }
+ goto out;
+
+found:
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm_uo,
+ first_frag, pos);
+ if (retval)
+ retval->msg_flags |= MSG_EOR;
+
+out:
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_reasm_uo(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sctp_ulpevent *retval = NULL;
+ struct sctp_stream_in *sin;
+
+ if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) {
+ event->msg_flags |= MSG_EOR;
+ return event;
+ }
+
+ sctp_intl_store_reasm_uo(ulpq, event);
+
+ sin = sctp_stream_in(ulpq->asoc, event->stream);
+ if (sin->pd_mode_uo && event->mid == sin->mid_uo &&
+ event->fsn == sin->fsn_uo)
+ retval = sctp_intl_retrieve_partial_uo(ulpq, event);
+
+ if (!retval)
+ retval = sctp_intl_retrieve_reassembled_uo(ulpq, event);
+
+ return retval;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq)
+{
+ struct sctp_stream_in *csin, *sin = NULL;
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ __u16 sid = 0;
+
+ skb_queue_walk(&ulpq->reasm_uo, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ csin = sctp_stream_in(ulpq->asoc, cevent->stream);
+ if (csin->pd_mode_uo)
+ continue;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (first_frag)
+ goto out;
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ sin = csin;
+ sid = cevent->stream;
+ sin->mid_uo = cevent->mid;
+ break;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag)
+ break;
+ if (cevent->stream == sid &&
+ cevent->mid == sin->mid_uo &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ last_frag = pos;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag)
+ goto out;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!first_frag)
+ return NULL;
+
+out:
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm_uo, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn_uo = next_fsn;
+ sin->pd_mode_uo = 1;
+ }
+
+ return retval;
+}
+
+static int sctp_ulpevent_idata(struct sctp_ulpq *ulpq,
+ struct sctp_chunk *chunk, gfp_t gfp)
+{
+ struct sctp_ulpevent *event;
+ struct sk_buff_head temp;
+ int event_eor = 0;
+
+ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
+ if (!event)
+ return -ENOMEM;
+
+ event->mid = ntohl(chunk->subh.idata_hdr->mid);
+ if (event->msg_flags & SCTP_DATA_FIRST_FRAG)
+ event->ppid = chunk->subh.idata_hdr->ppid;
+ else
+ event->fsn = ntohl(chunk->subh.idata_hdr->fsn);
+
+ if (!(event->msg_flags & SCTP_DATA_UNORDERED)) {
+ event = sctp_intl_reasm(ulpq, event);
+ if (event && event->msg_flags & MSG_EOR) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+
+ event = sctp_intl_order(ulpq, event);
+ }
+ } else {
+ event = sctp_intl_reasm_uo(ulpq, event);
+ }
+
+ if (event) {
+ event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
+ sctp_enqueue_event(ulpq, event);
+ }
+
+ return event_eor;
+}
+
+static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq)
+{
+ struct sctp_stream_in *csin, *sin = NULL;
+ struct sk_buff *first_frag = NULL;
+ struct sk_buff *last_frag = NULL;
+ struct sctp_ulpevent *retval;
+ struct sk_buff *pos;
+ __u32 next_fsn = 0;
+ __u16 sid = 0;
+
+ skb_queue_walk(&ulpq->reasm, pos) {
+ struct sctp_ulpevent *cevent = sctp_skb2event(pos);
+
+ csin = sctp_stream_in(ulpq->asoc, cevent->stream);
+ if (csin->pd_mode)
+ continue;
+
+ switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
+ case SCTP_DATA_FIRST_FRAG:
+ if (first_frag)
+ goto out;
+ if (cevent->mid == csin->mid) {
+ first_frag = pos;
+ last_frag = pos;
+ next_fsn = 0;
+ sin = csin;
+ sid = cevent->stream;
+ }
+ break;
+ case SCTP_DATA_MIDDLE_FRAG:
+ if (!first_frag)
+ break;
+ if (cevent->stream == sid &&
+ cevent->mid == sin->mid &&
+ cevent->fsn == next_fsn) {
+ next_fsn++;
+ last_frag = pos;
+ } else {
+ goto out;
+ }
+ break;
+ case SCTP_DATA_LAST_FRAG:
+ if (first_frag)
+ goto out;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!first_frag)
+ return NULL;
+
+out:
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag,
+ last_frag);
+ if (retval) {
+ sin->fsn = next_fsn;
+ sin->pd_mode = 1;
+ }
+
+ return retval;
+}
+
+static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
+{
+ struct sctp_ulpevent *event;
+
+ if (!skb_queue_empty(&ulpq->reasm)) {
+ do {
+ event = sctp_intl_retrieve_first(ulpq);
+ if (event)
+ sctp_enqueue_event(ulpq, event);
+ } while (event);
+ }
+
+ if (!skb_queue_empty(&ulpq->reasm_uo)) {
+ do {
+ event = sctp_intl_retrieve_first_uo(ulpq);
+ if (event)
+ sctp_enqueue_event(ulpq, event);
+ } while (event);
+ }
+}
+
+static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
+ gfp_t gfp)
+{
+ struct sctp_association *asoc = ulpq->asoc;
+ __u32 freed = 0;
+ __u16 needed;
+
+ if (chunk) {
+ needed = ntohs(chunk->chunk_hdr->length);
+ needed -= sizeof(struct sctp_idata_chunk);
+ } else {
+ needed = SCTP_DEFAULT_MAXWINDOW;
+ }
+
+ if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
+ freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed);
+ if (freed < needed)
+ freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm,
+ needed);
+ if (freed < needed)
+ freed += sctp_ulpq_renege_list(ulpq, &ulpq->reasm_uo,
+ needed);
+ }
+
+ if (chunk && freed >= needed)
+ if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
+ sctp_intl_start_pd(ulpq, gfp);
+
+ sk_mem_reclaim(asoc->base.sk);
+}
+
+static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,
+ __u32 mid, __u16 flags, gfp_t gfp)
+{
+ struct sock *sk = ulpq->asoc->base.sk;
+ struct sctp_ulpevent *ev = NULL;
+
+ if (!sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
+ &sctp_sk(sk)->subscribe))
+ return;
+
+ ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED,
+ sid, mid, flags, gfp);
+ if (ev) {
+ __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
+
+ if (!sctp_sk(sk)->data_ready_signalled) {
+ sctp_sk(sk)->data_ready_signalled = 1;
+ sk->sk_data_ready(sk);
+ }
+ }
+}
+
+static void sctp_intl_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
+{
+ struct sctp_stream *stream = &ulpq->asoc->stream;
+ struct sctp_ulpevent *cevent, *event = NULL;
+ struct sk_buff_head *lobby = &ulpq->lobby;
+ struct sk_buff *pos, *tmp;
+ struct sk_buff_head temp;
+ __u16 csid;
+ __u32 cmid;
+
+ skb_queue_head_init(&temp);
+ sctp_skb_for_each(pos, lobby, tmp) {
+ cevent = (struct sctp_ulpevent *)pos->cb;
+ csid = cevent->stream;
+ cmid = cevent->mid;
+
+ if (csid > sid)
+ break;
+
+ if (csid < sid)
+ continue;
+
+ if (!MID_lt(cmid, sctp_mid_peek(stream, in, csid)))
+ break;
+
+ __skb_unlink(pos, lobby);
+ if (!event)
+ event = sctp_skb2event(pos);
+
+ __skb_queue_tail(&temp, pos);
+ }
+
+ if (!event && pos != (struct sk_buff *)lobby) {
+ cevent = (struct sctp_ulpevent *)pos->cb;
+ csid = cevent->stream;
+ cmid = cevent->mid;
+
+ if (csid == sid && cmid == sctp_mid_peek(stream, in, csid)) {
+ sctp_mid_next(stream, in, csid);
+ __skb_unlink(pos, lobby);
+ __skb_queue_tail(&temp, pos);
+ event = sctp_skb2event(pos);
+ }
+ }
+
+ if (event) {
+ sctp_intl_retrieve_ordered(ulpq, event);
+ sctp_enqueue_event(ulpq, event);
+ }
+}
+
+static void sctp_intl_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
+{
+ struct sctp_stream *stream = &ulpq->asoc->stream;
+ __u16 sid;
+
+ for (sid = 0; sid < stream->incnt; sid++) {
+ struct sctp_stream_in *sin = &stream->in[sid];
+ __u32 mid;
+
+ if (sin->pd_mode_uo) {
+ sin->pd_mode_uo = 0;
+
+ mid = sin->mid_uo;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1, gfp);
+ }
+
+ if (sin->pd_mode) {
+ sin->pd_mode = 0;
+
+ mid = sin->mid;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0, gfp);
+ sctp_mid_skip(stream, in, sid, mid);
+
+ sctp_intl_reap_ordered(ulpq, sid);
+ }
+ }
+
+ /* intl abort pd happens only when all data needs to be cleaned */
+ sctp_ulpq_flush(ulpq);
+}
+
+static inline int sctp_get_skip_pos(struct sctp_ifwdtsn_skip *skiplist,
+ int nskips, __be16 stream, __u8 flags)
+{
+ int i;
+
+ for (i = 0; i < nskips; i++)
+ if (skiplist[i].stream == stream &&
+ skiplist[i].flags == flags)
+ return i;
+
+ return i;
+}
+
+#define SCTP_FTSN_U_BIT 0x1
+static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn)
+{
+ struct sctp_ifwdtsn_skip ftsn_skip_arr[10];
+ struct sctp_association *asoc = q->asoc;
+ struct sctp_chunk *ftsn_chunk = NULL;
+ struct list_head *lchunk, *temp;
+ int nskips = 0, skip_pos;
+ struct sctp_chunk *chunk;
+ __u32 tsn;
+
+ if (!asoc->peer.prsctp_capable)
+ return;
+
+ if (TSN_lt(asoc->adv_peer_ack_point, ctsn))
+ asoc->adv_peer_ack_point = ctsn;
+
+ list_for_each_safe(lchunk, temp, &q->abandoned) {
+ chunk = list_entry(lchunk, struct sctp_chunk, transmitted_list);
+ tsn = ntohl(chunk->subh.data_hdr->tsn);
+
+ if (TSN_lte(tsn, ctsn)) {
+ list_del_init(lchunk);
+ sctp_chunk_free(chunk);
+ } else if (TSN_lte(tsn, asoc->adv_peer_ack_point + 1)) {
+ __be16 sid = chunk->subh.idata_hdr->stream;
+ __be32 mid = chunk->subh.idata_hdr->mid;
+ __u8 flags = 0;
+
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ flags |= SCTP_FTSN_U_BIT;
+
+ asoc->adv_peer_ack_point = tsn;
+ skip_pos = sctp_get_skip_pos(&ftsn_skip_arr[0], nskips,
+ sid, flags);
+ ftsn_skip_arr[skip_pos].stream = sid;
+ ftsn_skip_arr[skip_pos].reserved = 0;
+ ftsn_skip_arr[skip_pos].flags = flags;
+ ftsn_skip_arr[skip_pos].mid = mid;
+ if (skip_pos == nskips)
+ nskips++;
+ if (nskips == 10)
+ break;
+ } else {
+ break;
+ }
+ }
+
+ if (asoc->adv_peer_ack_point > ctsn)
+ ftsn_chunk = sctp_make_ifwdtsn(asoc, asoc->adv_peer_ack_point,
+ nskips, &ftsn_skip_arr[0]);
+
+ if (ftsn_chunk) {
+ list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
+ SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
+ }
+}
+
+#define _sctp_walk_ifwdtsn(pos, chunk, end) \
+ for (pos = chunk->subh.ifwdtsn_hdr->skip; \
+ (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++)
+
+#define sctp_walk_ifwdtsn(pos, ch) \
+ _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \
+ sizeof(struct sctp_ifwdtsn_chunk))
+
+static bool sctp_validate_fwdtsn(struct sctp_chunk *chunk)
+{
+ struct sctp_fwdtsn_skip *skip;
+ __u16 incnt;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_FWD_TSN)
+ return false;
+
+ incnt = chunk->asoc->stream.incnt;
+ sctp_walk_fwdtsn(skip, chunk)
+ if (ntohs(skip->stream) >= incnt)
+ return false;
+
+ return true;
+}
+
+static bool sctp_validate_iftsn(struct sctp_chunk *chunk)
+{
+ struct sctp_ifwdtsn_skip *skip;
+ __u16 incnt;
+
+ if (chunk->chunk_hdr->type != SCTP_CID_I_FWD_TSN)
+ return false;
+
+ incnt = chunk->asoc->stream.incnt;
+ sctp_walk_ifwdtsn(skip, chunk)
+ if (ntohs(skip->stream) >= incnt)
+ return false;
+
+ return true;
+}
+
+static void sctp_report_fwdtsn(struct sctp_ulpq *ulpq, __u32 ftsn)
+{
+ /* Move the Cumulattive TSN Ack ahead. */
+ sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn);
+ /* purge the fragmentation queue */
+ sctp_ulpq_reasm_flushtsn(ulpq, ftsn);
+ /* Abort any in progress partial delivery. */
+ sctp_ulpq_abort_pd(ulpq, GFP_ATOMIC);
+}
+
+static void sctp_intl_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 ftsn)
+{
+ struct sk_buff *pos, *tmp;
+
+ skb_queue_walk_safe(&ulpq->reasm, pos, tmp) {
+ struct sctp_ulpevent *event = sctp_skb2event(pos);
+ __u32 tsn = event->tsn;
+
+ if (TSN_lte(tsn, ftsn)) {
+ __skb_unlink(pos, &ulpq->reasm);
+ sctp_ulpevent_free(event);
+ }
+ }
+
+ skb_queue_walk_safe(&ulpq->reasm_uo, pos, tmp) {
+ struct sctp_ulpevent *event = sctp_skb2event(pos);
+ __u32 tsn = event->tsn;
+
+ if (TSN_lte(tsn, ftsn)) {
+ __skb_unlink(pos, &ulpq->reasm_uo);
+ sctp_ulpevent_free(event);
+ }
+ }
+}
+
+static void sctp_report_iftsn(struct sctp_ulpq *ulpq, __u32 ftsn)
+{
+ /* Move the Cumulattive TSN Ack ahead. */
+ sctp_tsnmap_skip(&ulpq->asoc->peer.tsn_map, ftsn);
+ /* purge the fragmentation queue */
+ sctp_intl_reasm_flushtsn(ulpq, ftsn);
+ /* abort only when it's for all data */
+ if (ftsn == sctp_tsnmap_get_max_tsn_seen(&ulpq->asoc->peer.tsn_map))
+ sctp_intl_abort_pd(ulpq, GFP_ATOMIC);
+}
+
+static void sctp_handle_fwdtsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
+{
+ struct sctp_fwdtsn_skip *skip;
+
+ /* Walk through all the skipped SSNs */
+ sctp_walk_fwdtsn(skip, chunk)
+ sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
+}
+
+static void sctp_intl_skip(struct sctp_ulpq *ulpq, __u16 sid, __u32 mid,
+ __u8 flags)
+{
+ struct sctp_stream_in *sin = sctp_stream_in(ulpq->asoc, sid);
+ struct sctp_stream *stream = &ulpq->asoc->stream;
+
+ if (flags & SCTP_FTSN_U_BIT) {
+ if (sin->pd_mode_uo && MID_lt(sin->mid_uo, mid)) {
+ sin->pd_mode_uo = 0;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x1,
+ GFP_ATOMIC);
+ }
+ return;
+ }
+
+ if (MID_lt(mid, sctp_mid_peek(stream, in, sid)))
+ return;
+
+ if (sin->pd_mode) {
+ sin->pd_mode = 0;
+ sctp_intl_stream_abort_pd(ulpq, sid, mid, 0x0, GFP_ATOMIC);
+ }
+
+ sctp_mid_skip(stream, in, sid, mid);
+
+ sctp_intl_reap_ordered(ulpq, sid);
+}
+
+static void sctp_handle_iftsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
+{
+ struct sctp_ifwdtsn_skip *skip;
+
+ /* Walk through all the skipped MIDs and abort stream pd if possible */
+ sctp_walk_ifwdtsn(skip, chunk)
+ sctp_intl_skip(ulpq, ntohs(skip->stream),
+ ntohl(skip->mid), skip->flags);
+}
+
+static struct sctp_stream_interleave sctp_stream_interleave_0 = {
+ .data_chunk_len = sizeof(struct sctp_data_chunk),
+ .ftsn_chunk_len = sizeof(struct sctp_fwdtsn_chunk),
+ /* DATA process functions */
+ .make_datafrag = sctp_make_datafrag_empty,
+ .assign_number = sctp_chunk_assign_ssn,
+ .validate_data = sctp_validate_data,
+ .ulpevent_data = sctp_ulpq_tail_data,
+ .enqueue_event = sctp_ulpq_tail_event,
+ .renege_events = sctp_ulpq_renege,
+ .start_pd = sctp_ulpq_partial_delivery,
+ .abort_pd = sctp_ulpq_abort_pd,
+ /* FORWARD-TSN process functions */
+ .generate_ftsn = sctp_generate_fwdtsn,
+ .validate_ftsn = sctp_validate_fwdtsn,
+ .report_ftsn = sctp_report_fwdtsn,
+ .handle_ftsn = sctp_handle_fwdtsn,
+};
+
+static struct sctp_stream_interleave sctp_stream_interleave_1 = {
+ .data_chunk_len = sizeof(struct sctp_idata_chunk),
+ .ftsn_chunk_len = sizeof(struct sctp_ifwdtsn_chunk),
+ /* I-DATA process functions */
+ .make_datafrag = sctp_make_idatafrag_empty,
+ .assign_number = sctp_chunk_assign_mid,
+ .validate_data = sctp_validate_idata,
+ .ulpevent_data = sctp_ulpevent_idata,
+ .enqueue_event = sctp_enqueue_event,
+ .renege_events = sctp_renege_events,
+ .start_pd = sctp_intl_start_pd,
+ .abort_pd = sctp_intl_abort_pd,
+ /* I-FORWARD-TSN process functions */
+ .generate_ftsn = sctp_generate_iftsn,
+ .validate_ftsn = sctp_validate_iftsn,
+ .report_ftsn = sctp_report_iftsn,
+ .handle_ftsn = sctp_handle_iftsn,
+};
+
+void sctp_stream_interleave_init(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ stream->si = asoc->intl_enable ? &sctp_stream_interleave_1
+ : &sctp_stream_interleave_0;
+}
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 0b83ec51e43b..f5fcd425232a 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
.unsched_all = sctp_sched_fcfs_unsched_all,
};
+static void sctp_sched_ops_fcfs_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs);
+}
+
/* API to other parts of the stack */
-extern struct sctp_sched_ops sctp_sched_prio;
-extern struct sctp_sched_ops sctp_sched_rr;
+static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1];
-static struct sctp_sched_ops *sctp_sched_ops[] = {
- &sctp_sched_fcfs,
- &sctp_sched_prio,
- &sctp_sched_rr,
-};
+void sctp_sched_ops_register(enum sctp_sched_type sched,
+ struct sctp_sched_ops *sched_ops)
+{
+ sctp_sched_ops[sched] = sched_ops;
+}
+
+void sctp_sched_ops_init(void)
+{
+ sctp_sched_ops_fcfs_init();
+ sctp_sched_ops_prio_init();
+ sctp_sched_ops_rr_init();
+}
int sctp_sched_set_sched(struct sctp_association *asoc,
enum sctp_sched_type sched)
@@ -231,7 +242,8 @@ int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
{
- if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
+ if (!list_is_last(&ch->frag_list, &ch->msg->chunks) &&
+ !q->asoc->intl_enable) {
struct sctp_stream_out *sout;
__u16 sid;
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 384dbf3c8760..7997d35dd0fd 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
sctp_sched_prio_unsched(soute);
}
-struct sctp_sched_ops sctp_sched_prio = {
+static struct sctp_sched_ops sctp_sched_prio = {
.set = sctp_sched_prio_set,
.get = sctp_sched_prio_get,
.init = sctp_sched_prio_init,
@@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = {
.sched_all = sctp_sched_prio_sched_all,
.unsched_all = sctp_sched_prio_unsched_all,
};
+
+void sctp_sched_ops_prio_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio);
+}
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index 7612a438c5b9..1155692448f1 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c
@@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
sctp_sched_rr_unsched(stream, soute);
}
-struct sctp_sched_ops sctp_sched_rr = {
+static struct sctp_sched_ops sctp_sched_rr = {
.set = sctp_sched_rr_set,
.get = sctp_sched_rr_get,
.init = sctp_sched_rr_init,
@@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = {
.sched_all = sctp_sched_rr_sched_all,
.unsched_all = sctp_sched_rr_unsched_all,
};
+
+void sctp_sched_ops_rr_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr);
+}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index ef7ca44d6e6a..33ca5b73cdb3 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -289,6 +289,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_sctp_do_auth,
},
{
+ .procname = "intl_enable",
+ .data = &init_net.sctp.intl_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "addr_scope_policy",
.data = &init_net.sctp.scope_policy,
.maxlen = sizeof(int),
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf5..47f82bd794d9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
struct dst_entry *dst = sctp_transport_dst_check(t);
+ bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
- pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
- __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
- /* Use default minimum segment size and disable
- * pmtu discovery on this transport.
- */
- t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
- } else {
- t->pathmtu = pmtu;
+ pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+ __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+ /* Use default minimum segment instead */
+ pmtu = SCTP_DEFAULT_MINSEGMENT;
}
+ pmtu = SCTP_TRUNC4(pmtu);
if (dst) {
dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
dst = sctp_transport_dst_check(t);
}
- if (!dst)
+ if (!dst) {
t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+ dst = t->dst;
+ }
+
+ if (dst) {
+ /* Re-fetch, as under layers may have a higher minimum size */
+ pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ change = t->pathmtu != pmtu;
+ }
+ t->pathmtu = pmtu;
+
+ return change;
}
/* Caches the dst entry and source address for a transport's destination
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5447228bf1a0..84207ad33e8e 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -443,8 +443,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
goto fail;
/* Pull off the common chunk header and DATA header. */
- skb_pull(skb, sizeof(struct sctp_data_chunk));
- len -= sizeof(struct sctp_data_chunk);
+ skb_pull(skb, sctp_datachk_len(&asoc->stream));
+ len -= sctp_datachk_len(&asoc->stream);
/* Embed the event fields inside the cloned skb. */
event = sctp_skb2event(skb);
@@ -705,8 +705,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
sctp_ulpevent_receive_data(event, asoc);
event->stream = ntohs(chunk->subh.data_hdr->stream);
- event->ssn = ntohs(chunk->subh.data_hdr->ssn);
- event->ppid = chunk->subh.data_hdr->ppid;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
event->flags |= SCTP_UNORDERED;
event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
@@ -732,8 +730,9 @@ fail:
* various events.
*/
struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
- const struct sctp_association *asoc, __u32 indication,
- gfp_t gfp)
+ const struct sctp_association *asoc,
+ __u32 indication, __u32 sid, __u32 seq,
+ __u32 flags, gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_pdapi_event *pd;
@@ -754,7 +753,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
* Currently unused.
*/
pd->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
- pd->pdapi_flags = 0;
+ pd->pdapi_flags = flags;
+ pd->pdapi_stream = sid;
+ pd->pdapi_seq = seq;
/* pdapi_length: 32 bits (unsigned integer)
*
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..0b427100b0d4 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -60,6 +60,7 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
ulpq->asoc = asoc;
skb_queue_head_init(&ulpq->reasm);
+ skb_queue_head_init(&ulpq->reasm_uo);
skb_queue_head_init(&ulpq->lobby);
ulpq->pd_mode = 0;
@@ -83,6 +84,10 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
sctp_ulpevent_free(event);
}
+ while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) {
+ event = sctp_skb2event(skb);
+ sctp_ulpevent_free(event);
+ }
}
/* Dispose of a ulpqueue. */
@@ -104,6 +109,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
if (!event)
return -ENOMEM;
+ event->ssn = ntohs(chunk->subh.data_hdr->ssn);
+ event->ppid = chunk->subh.data_hdr->ppid;
+
/* Do reassembly if needed. */
event = sctp_ulpq_reasm(ulpq, event);
@@ -328,9 +336,10 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
* payload was fragmented on the way and ip had to reassemble them.
* We add the rest of skb's to the first skb's fraglist.
*/
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
- struct sk_buff_head *queue, struct sk_buff *f_frag,
- struct sk_buff *l_frag)
+struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
+ struct sk_buff_head *queue,
+ struct sk_buff *f_frag,
+ struct sk_buff *l_frag)
{
struct sk_buff *pos;
struct sk_buff *new = NULL;
@@ -853,7 +862,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
struct sctp_stream *stream;
/* Check if this message needs ordering. */
- if (SCTP_DATA_UNORDERED & event->msg_flags)
+ if (event->msg_flags & SCTP_DATA_UNORDERED)
return event;
/* Note: The stream ID must be verified before this routine. */
@@ -974,8 +983,8 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
sctp_ulpq_reap_ordered(ulpq, sid);
}
-static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
- struct sk_buff_head *list, __u16 needed)
+__u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list,
+ __u16 needed)
{
__u16 freed = 0;
__u32 tsn, last_tsn;
@@ -1084,29 +1093,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
gfp_t gfp)
{
- struct sctp_association *asoc;
- __u16 needed, freed;
+ struct sctp_association *asoc = ulpq->asoc;
+ __u32 freed = 0;
+ __u16 needed;
- asoc = ulpq->asoc;
-
- if (chunk) {
- needed = ntohs(chunk->chunk_hdr->length);
- needed -= sizeof(struct sctp_data_chunk);
- } else
- needed = SCTP_DEFAULT_MAXWINDOW;
-
- freed = 0;
+ needed = ntohs(chunk->chunk_hdr->length) -
+ sizeof(struct sctp_data_chunk);
if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
freed = sctp_ulpq_renege_order(ulpq, needed);
- if (freed < needed) {
+ if (freed < needed)
freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
- }
}
/* If able to free enough room, accept this chunk. */
- if (chunk && (freed >= needed)) {
- int retval;
- retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+ if (freed >= needed) {
+ int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
/*
* Enter partial delivery if chunk has not been
* delivered; otherwise, drain the reassembly queue.
@@ -1140,7 +1141,7 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
&sctp_sk(sk)->subscribe))
ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
SCTP_PARTIAL_DELIVERY_ABORTED,
- gfp);
+ 0, 0, 0, gfp);
if (ev)
__skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6451c5013e06..3583c8ab1bae 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
- sock_hold(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
- } else {
+ if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *sock)
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
+ if (smc->use_fallback) {
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ }
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
+ if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
out:
return rc;
}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
- INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
@@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
@@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -513,6 +520,8 @@ out_connected:
return rc ? rc : local_contact;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
decline_rdma:
@@ -520,15 +529,19 @@ decline_rdma:
smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
rc = smc_clc_send_decline(smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (rc < 0)
goto out_err;
}
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
out_err:
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
}
@@ -581,40 +594,33 @@ out_err:
static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
{
- struct sock *sk = &lsmc->sk;
- struct socket *new_clcsock;
+ struct socket *new_clcsock = NULL;
+ struct sock *lsk = &lsmc->sk;
struct sock *new_sk;
int rc;
- release_sock(&lsmc->sk);
- new_sk = smc_sock_alloc(sock_net(sk), NULL);
+ release_sock(lsk);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL);
if (!new_sk) {
rc = -ENOMEM;
- lsmc->sk.sk_err = ENOMEM;
+ lsk->sk_err = ENOMEM;
*new_smc = NULL;
- lock_sock(&lsmc->sk);
+ lock_sock(lsk);
goto out;
}
*new_smc = smc_sk(new_sk);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
- lock_sock(&lsmc->sk);
- if (rc < 0) {
- lsmc->sk.sk_err = -rc;
- new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
- *new_smc = NULL;
- goto out;
- }
- if (lsmc->sk.sk_state == SMC_CLOSED) {
+ lock_sock(lsk);
+ if (rc < 0)
+ lsk->sk_err = -rc;
+ if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
}
@@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
{
struct smc_sock *par = smc_sk(parent);
- sock_hold(sk);
+ sock_hold(sk); /* sock_put in smc_accept_unlink () */
spin_lock(&par->accept_q_lock);
list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
spin_unlock(&par->accept_q_lock);
@@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk)
list_del_init(&smc_sk(sk)->accept_q);
spin_unlock(&par->accept_q_lock);
sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
- sock_put(sk);
+ sock_put(sk); /* sock_hold in smc_accept_enqueue */
}
/* remove a sock from the accept queue to bind it to a new socket created
@@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ if (isk->clcsock) {
+ sock_release(isk->clcsock);
+ isk->clcsock = NULL;
+ }
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
continue;
}
if (new_sock)
@@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk);
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- } else {
+ if (!smc->use_fallback) {
smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk)
sock_release(tcp);
}
if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ if (sk->sk_state == SMC_CLOSED)
+ smc_conn_free(&smc->conn);
}
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -751,14 +759,16 @@ static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
struct smc_sock *lsmc = new_smc->listen_smc;
struct smc_clc_msg_accept_confirm cclc;
int local_contact = SMC_REUSE_CONTACT;
struct sock *newsmcsk = &new_smc->sk;
- struct smc_clc_msg_proposal pclc;
+ struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
struct sockaddr_in peeraddr;
+ u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
int rc = 0, len;
@@ -775,7 +785,7 @@ static void smc_listen_work(struct work_struct *work)
/* do inband token exchange -
*wait for and receive SMC Proposal CLC message
*/
- reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
+ reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
SMC_CLC_PROPOSAL);
if (reason_code < 0)
goto out_err;
@@ -804,8 +814,11 @@ static void smc_listen_work(struct work_struct *work)
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
- if ((pclc.outgoing_subnet != subnet) ||
- (pclc.prefix_len != prefix_len)) {
+
+ pclc = (struct smc_clc_msg_proposal *)&buf;
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (pclc_prfx->outgoing_subnet != subnet ||
+ pclc_prfx->prefix_len != prefix_len) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc.lcl, 0);
+ smcibdev, ibport, &pclc->lcl, 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -879,11 +892,9 @@ static void smc_listen_work(struct work_struct *work)
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
- if (reason_code < 0) {
+ if (reason_code < 0)
/* peer is not aware of a problem */
- rc = reason_code;
goto out_err_unlock;
- }
if (reason_code > 0)
goto decline_rdma_unlock;
}
@@ -910,21 +921,26 @@ enqueue:
return;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc_conn_free(&new_smc->conn);
new_smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(new_smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (smc_clc_send_decline(new_smc, reason_code) < 0)
goto out_err;
}
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
out_err:
+ if (newsmcsk->sk_state == SMC_INIT)
+ sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
@@ -934,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
{
struct smc_sock *lsmc = container_of(work, struct smc_sock,
tcp_listen_work);
+ struct sock *lsk = &lsmc->sk;
struct smc_sock *new_smc;
int rc = 0;
- lock_sock(&lsmc->sk);
- while (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock(lsk);
+ while (lsk->sk_state == SMC_LISTEN) {
rc = smc_clcsock_accept(lsmc, &new_smc);
if (rc)
goto out;
@@ -947,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work)
new_smc->listen_smc = lsmc;
new_smc->use_fallback = false; /* assume rdma capability first*/
- sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
+ sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- schedule_work(&new_smc->smc_listen_work);
+ sock_hold(&new_smc->sk); /* sock_put in passive closing */
+ if (!schedule_work(&new_smc->smc_listen_work))
+ sock_put(&new_smc->sk);
}
out:
- release_sock(&lsmc->sk);
- lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
+ if (lsmc->clcsock) {
+ sock_release(lsmc->clcsock);
+ lsmc->clcsock = NULL;
+ }
+ release_sock(lsk);
+ /* no more listening, wake up smc_close_wait_listen_clcsock and
+ * accept
+ */
+ lsk->sk_state_change(lsk);
+ sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
static int smc_listen(struct socket *sock, int backlog)
@@ -989,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
- schedule_work(&smc->tcp_listen_work);
+ sock_hold(sk); /* sock_hold in tcp_listen_worker */
+ if (!schedule_work(&smc->tcp_listen_work))
+ sock_put(sk);
out:
release_sock(sk);
@@ -1006,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
int rc = 0;
lsmc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1040,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
out:
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
return rc;
}
@@ -1107,36 +1138,36 @@ out:
return rc;
}
-static unsigned int smc_accept_poll(struct sock *parent)
+static __poll_t smc_accept_poll(struct sock *parent)
{
- struct smc_sock *isk;
- struct sock *sk;
+ struct smc_sock *isk = smc_sk(parent);
+ int mask = 0;
- lock_sock(parent);
- list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)isk;
+ spin_lock(&isk->accept_q_lock);
+ if (!list_empty(&isk->accept_q))
+ mask = POLLIN | POLLRDNORM;
+ spin_unlock(&isk->accept_q_lock);
- if (sk->sk_state == SMC_ACTIVE) {
- release_sock(parent);
- return POLLIN | POLLRDNORM;
- }
- }
- release_sock(parent);
-
- return 0;
+ return mask;
}
-static unsigned int smc_poll(struct file *file, struct socket *sock,
+static __poll_t smc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
struct smc_sock *smc;
int rc;
+ if (!sk)
+ return POLLNVAL;
+
smc = smc_sk(sock->sk);
+ sock_hold(sk);
+ lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
+ release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
@@ -1148,37 +1179,43 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= POLLERR;
- else
- /* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ /* success cases including fallback */
+ mask |= POLLOUT | POLLWRNORM;
}
}
- release_sock(sk);
} else {
- sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == SMC_LISTEN)
- /* woken up by sk_data_ready in smc_listen_work() */
- mask |= smc_accept_poll(sk);
+ if (sk->sk_state != SMC_CLOSED) {
+ release_sock(sk);
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ lock_sock(sk);
+ }
if (sk->sk_err)
mask |= POLLERR;
- if (atomic_read(&smc->conn.sndbuf_space) ||
- (sk->sk_shutdown & SEND_SHUTDOWN)) {
- mask |= POLLOUT | POLLWRNORM;
- } else {
- sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
- if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
mask |= POLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ if (sk->sk_state == SMC_LISTEN) {
+ /* woken up by sk_data_ready in smc_listen_work() */
+ mask = smc_accept_poll(sk);
+ } else {
+ if (atomic_read(&smc->conn.sndbuf_space) ||
+ sk->sk_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ if (atomic_read(&smc->conn.bytes_to_rcv))
+ mask |= POLLIN | POLLRDNORM;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ if (sk->sk_state == SMC_APPCLOSEWAIT1)
+ mask |= POLLIN;
+ }
}
+ release_sock(sk);
+ sock_put(sk);
return mask;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 0bee9d16cf29..9518986c97b1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
- struct delayed_work sock_put_work; /* final socket freeing */
bool use_fallback; /* fallback to tcp */
u8 wait_close_tx_prepared : 1;
/* shutdown wr or close
@@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed)
static inline bool using_ipsec(struct smc_sock *smc)
{
return (smc->clcsock->sk->sk_policy[0] ||
- smc->clcsock->sk->sk_policy[1]) ? 1 : 0;
+ smc->clcsock->sk->sk_policy[1]) ? true : false;
}
#else
static inline bool using_ipsec(struct smc_sock *smc)
{
- return 0;
+ return false;
}
#endif
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 87f7bede6eab..3cd086e5bd28 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
cdcpend->conn);
}
smc_tx_sndbuf_nonfull(smc);
- if (smc->sk.sk_state != SMC_ACTIVE)
- /* wake up smc_close_wait_tx_pends() */
- smc->sk.sk_state_change(&smc->sk);
bh_unlock_sock(&smc->sk);
}
@@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ int rc;
- return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
- (struct smc_wr_tx_pend_priv **)pend);
+ rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+ (struct smc_wr_tx_pend_priv **)pend);
+ if (!conn->alert_token_local)
+ /* abnormal termination */
+ rc = -EPIPE;
+ return rc;
}
static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
@@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
-bool smc_cdc_tx_has_pending(struct smc_connection *conn)
-{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
- return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
- smc_cdc_tx_filter, (unsigned long)conn);
-}
-
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -213,6 +207,17 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
/* guarantee 0 <= bytes_to_rcv <= rmbe_size */
smp_mb__after_atomic();
smc->sk.sk_data_ready(&smc->sk);
+ } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
+ (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) {
+ smc->sk.sk_data_ready(&smc->sk);
+ }
+
+ /* piggy backed tx info */
+ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
+ if (diff_cons && smc_tx_prepared_sends(conn)) {
+ smc_tx_sndbuf_nonempty(conn);
+ /* trigger socket release if connection closed */
+ smc_close_wake_tx_prepared(smc);
}
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
@@ -224,25 +229,10 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
- schedule_work(&conn->close_work);
+ sock_hold(&smc->sk); /* sock_put in close_work */
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
}
-
- /* piggy backed tx info */
- /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn)) {
- smc_tx_sndbuf_nonempty(conn);
- /* trigger socket release if connection closed */
- smc_close_wake_tx_prepared(smc);
- }
-
- /* socket connected but not accepted */
- if (!smc->sk.sk_socket)
- return;
-
- /* data available */
- if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
- (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req))
- smc_tx_consumer_update(conn);
}
/* called under tasklet context */
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 149ceda1b088..ab240b37ad11 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
-bool smc_cdc_tx_has_pending(struct smc_connection *conn);
int smc_cdc_init(void) __init;
#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 1800e16b2a02..8ac51583a063 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -22,6 +22,54 @@
#include "smc_clc.h"
#include "smc_ib.h"
+/* check if received message has a correct header length and contains valid
+ * heading and trailing eyecatchers
+ */
+static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
+{
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
+ struct smc_clc_msg_accept_confirm *clc;
+ struct smc_clc_msg_proposal *pclc;
+ struct smc_clc_msg_decline *dclc;
+ struct smc_clc_msg_trail *trl;
+
+ if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ return false;
+ switch (clcm->type) {
+ case SMC_CLC_PROPOSAL:
+ pclc = (struct smc_clc_msg_proposal *)clcm;
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (ntohs(pclc->hdr.length) !=
+ sizeof(*pclc) + ntohs(pclc->iparea_offset) +
+ sizeof(*pclc_prfx) +
+ pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(struct smc_clc_ipv6_prefix) +
+ sizeof(*trl))
+ return false;
+ trl = (struct smc_clc_msg_trail *)
+ ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
+ break;
+ case SMC_CLC_ACCEPT:
+ case SMC_CLC_CONFIRM:
+ clc = (struct smc_clc_msg_accept_confirm *)clcm;
+ if (ntohs(clc->hdr.length) != sizeof(*clc))
+ return false;
+ trl = &clc->trl;
+ break;
+ case SMC_CLC_DECLINE:
+ dclc = (struct smc_clc_msg_decline *)clcm;
+ if (ntohs(dclc->hdr.length) != sizeof(*dclc))
+ return false;
+ trl = &dclc->trl;
+ break;
+ default:
+ return false;
+ }
+ if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ return false;
+ return true;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
@@ -35,7 +83,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
struct smc_clc_msg_hdr *clcm = buf;
struct msghdr msg = {NULL, 0};
int reason_code = 0;
- struct kvec vec;
+ struct kvec vec = {buf, buflen};
int len, datlen;
int krflags;
@@ -43,12 +91,15 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
* so we don't consume any subsequent CLC message or payload data
* in the TCP byte stream
*/
- vec.iov_base = buf;
- vec.iov_len = buflen;
+ /*
+ * Caller must make sure that buflen is no less than
+ * sizeof(struct smc_clc_msg_hdr)
+ */
krflags = MSG_PEEK | MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
- len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
- sizeof(struct smc_clc_msg_hdr), krflags);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+ sizeof(struct smc_clc_msg_hdr));
+ len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (signal_pending(current)) {
reason_code = -EINTR;
clc_sk->sk_err = EINTR;
@@ -72,9 +123,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
datlen = ntohs(clcm->length);
if ((len < sizeof(struct smc_clc_msg_hdr)) ||
- (datlen < sizeof(struct smc_clc_msg_decline)) ||
- (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
- memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
+ (datlen > buflen) ||
((clcm->type != SMC_CLC_DECLINE) &&
(clcm->type != expected_type))) {
smc->sk.sk_err = EPROTO;
@@ -83,13 +132,12 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
/* receive the complete CLC message */
- vec.iov_base = buf;
- vec.iov_len = buflen;
memset(&msg, 0, sizeof(struct msghdr));
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
krflags = MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
- len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
- if (len < datlen) {
+ len = sock_recvmsg(smc->clcsock, &msg, krflags);
+ if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
smc->sk.sk_err = EPROTO;
reason_code = -EPROTO;
goto out;
@@ -133,7 +181,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
smc->sk.sk_err = EPROTO;
if (len < 0)
smc->sk.sk_err = -len;
- return len;
+ return sock_error(&smc->sk);
}
/* send CLC PROPOSAL message across internal TCP socket */
@@ -141,33 +189,43 @@ int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
+ struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_proposal pclc;
+ struct smc_clc_msg_trail trl;
int reason_code = 0;
+ struct kvec vec[3];
struct msghdr msg;
- struct kvec vec;
- int len, rc;
+ int len, plen, rc;
/* send SMC Proposal CLC message */
+ plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
- pclc.hdr.length = htons(sizeof(pclc));
+ pclc.hdr.length = htons(plen);
pclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
+ pclc.iparea_offset = htons(0);
+ memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
- &pclc.prefix_len);
+ rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ &pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
- memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+ pclc_prfx.ipv6_prefixes_cnt = 0;
+ memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
- vec.iov_base = &pclc;
- vec.iov_len = sizeof(pclc);
+ vec[0].iov_base = &pclc;
+ vec[0].iov_len = sizeof(pclc);
+ vec[1].iov_base = &pclc_prfx;
+ vec[1].iov_len = sizeof(pclc_prfx);
+ vec[2].iov_base = &trl;
+ vec[2].iov_len = sizeof(trl);
/* due to the few bytes needed for clc-handshake this cannot block */
- len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
+ len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 12a9af1539a2..c145a0f36a68 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
flag : 1,
- rsvd : 3;
+ rsvd : 3;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 rsvd : 3,
flag : 1,
@@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
-struct smc_clc_msg_proposal { /* clc proposal message */
- struct smc_clc_msg_hdr hdr;
- struct smc_clc_msg_local lcl;
- __be16 iparea_offset; /* offset to IP address information area */
+struct smc_clc_ipv6_prefix {
+ u8 prefix[4];
+ u8 prefix_len;
+} __packed;
+
+struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
u8 prefix_len; /* number of significant bits in mask */
u8 reserved[2];
u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
- struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
+struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
+ struct smc_clc_msg_hdr hdr;
+ struct smc_clc_msg_local lcl;
+ __be16 iparea_offset; /* offset to IP address information area */
+} __aligned(4);
+
+#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
+#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
+ SMC_CLC_PROPOSAL_MAX_OFFSET + \
+ SMC_CLC_PROPOSAL_MAX_PREFIX + \
+ sizeof(struct smc_clc_msg_trail))
+
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
@@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
+/* determine start of the prefix area within the proposal message */
+static inline struct smc_clc_msg_proposal_prefix *
+smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
+{
+ return (struct smc_clc_msg_proposal_prefix *)
+ ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
+}
+
struct smc_sock;
struct smc_ib_device;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 48615d2ac4aa..e339c0186dcf 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -19,7 +19,7 @@
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
+#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
static void smc_close_cleanup_listen(struct sock *parent)
{
@@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent)
smc_close_non_accepted(sk);
}
-static void smc_close_wait_tx_pends(struct smc_sock *smc)
+static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = &smc->sk;
signed long timeout;
- timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
+ timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
add_wait_queue(sk_sleep(sk), &wait);
- while (!signal_pending(current) && timeout) {
- int rc;
-
- rc = sk_wait_event(sk, &timeout,
- !smc_cdc_tx_has_pending(&smc->conn),
- &wait);
- if (rc)
+ do {
+ release_sock(sk);
+ if (smc->clcsock)
+ timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
+ timeout);
+ sched_annotate_sleep();
+ lock_sock(sk);
+ if (!smc->clcsock)
break;
- }
+ } while (timeout);
remove_wait_queue(sk_sleep(sk), &wait);
}
@@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn)
}
/* terminate smc socket abnormally - active abort
- * RDMA communication no longer possible
+ * link group is terminated, i.e. RDMA communication no longer possible
*/
-void smc_close_active_abort(struct smc_sock *smc)
+static void smc_close_active_abort(struct smc_sock *smc)
{
+ struct sock *sk = &smc->sk;
+
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- smc->sk.sk_err = ECONNABORTED;
+ sk->sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
smc->clcsock->sk->sk_err = ECONNABORTED;
smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
}
- switch (smc->sk.sk_state) {
+ switch (sk->sk_state) {
case SMC_INIT:
case SMC_ACTIVE:
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
+ sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
if (!smc_cdc_rxed_any_close(&smc->conn))
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
else
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (!txflags->peer_conn_closed) {
- smc->sk.sk_state = SMC_PEERABORTWAIT;
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
+ /* just SHUTDOWN_SEND done */
+ sk->sk_state = SMC_PEERABORTWAIT;
} else {
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
}
+ sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
- if (!txflags->peer_conn_closed) {
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
- }
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERFINCLOSEWAIT:
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
break;
}
- sock_set_flag(&smc->sk, SOCK_DEAD);
- smc->sk.sk_state_change(&smc->sk);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_state_change(sk);
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
- if (smc->smc_listen_work.func)
- cancel_work_sync(&smc->smc_listen_work);
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
@@ -200,11 +204,9 @@ again:
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
+ smc_close_wait_listen_clcsock(smc);
}
- release_sock(sk);
smc_close_cleanup_listen(sk);
- cancel_work_sync(&smc->smc_listen_work);
- lock_sock(sk);
break;
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
@@ -214,6 +216,8 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -226,9 +230,10 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
@@ -237,19 +242,21 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
- if (sk->sk_err != ECONNABORTED) {
- /* confirm close from peer */
- rc = smc_close_final(conn);
- if (rc)
- break;
- }
- if (smc_cdc_rxed_any_close(conn))
+ if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
+ sk->sk_state != SMC_APPCLOSEWAIT2)
+ goto again;
+ /* confirm close from peer */
+ rc = smc_close_final(conn);
+ if (rc)
+ break;
+ if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
- else
+ sock_put(sk); /* postponed passive closing */
+ } else {
/* peer has just issued a shutdown write */
sk->sk_state = SMC_PEERFINCLOSEWAIT;
- smc_close_wait_tx_pends(smc);
+ }
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
@@ -257,6 +264,8 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -264,12 +273,8 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
@@ -278,7 +283,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
@@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
struct sock *sk = &smc->sk;
switch (sk->sk_state) {
+ case SMC_INIT:
case SMC_ACTIVE:
- case SMC_APPFINCLOSEWAIT:
case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- smc_close_abort(&smc->conn);
+ sk->sk_state = SMC_PROCESSABORT;
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PROCESSABORT;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(&smc->conn)) {
+ !smc_close_sent_any_close(&smc->conn))
/* just shutdown, but not yet closed locally */
- smc_close_abort(&smc->conn);
sk->sk_state = SMC_PROCESSABORT;
- } else {
+ else
sk->sk_state = SMC_CLOSED;
- }
+ sock_put(sk); /* passive closing */
break;
+ case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_INIT:
case SMC_PROCESSABORT:
/* nothing to do, add tracing in future patch */
break;
}
}
-/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
- * or peer_done_writing.
+/* Either some kind of closing has been received: peer_conn_closed,
+ * peer_conn_abort, or peer_done_writing
+ * or the link group of the connection terminates abnormally.
*/
static void smc_close_passive_work(struct work_struct *work)
{
@@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work)
struct sock *sk = &smc->sk;
int old_state;
- lock_sock(&smc->sk);
+ lock_sock(sk);
old_state = sk->sk_state;
if (!conn->alert_token_local) {
@@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work)
goto wakeup;
}
- rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
+ rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
+ /* peer has not received all data */
smc_close_passive_abort_received(smc);
+ release_sock(&smc->sk);
+ cancel_delayed_work_sync(&conn->tx_work);
+ lock_sock(&smc->sk);
goto wakeup;
}
switch (sk->sk_state) {
case SMC_INIT:
- if (atomic_read(&smc->conn.bytes_to_rcv) ||
+ if (atomic_read(&conn->bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn)))
+ !smc_cdc_rxed_any_close(conn))) {
sk->sk_state = SMC_APPCLOSEWAIT1;
- else
+ } else {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
break;
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
@@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work)
/* fall through */
/* to check for closing */
case SMC_PEERCLOSEWAIT2:
- case SMC_PEERFINCLOSEWAIT:
- if (!smc_cdc_rxed_any_close(&smc->conn))
+ if (!smc_cdc_rxed_any_close(conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
smc_close_sent_any_close(conn)) {
@@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work)
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_APPFINCLOSEWAIT;
}
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_PEERFINCLOSEWAIT:
+ if (smc_cdc_rxed_any_close(conn)) {
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
+ break;
case SMC_APPFINCLOSEWAIT:
case SMC_PEERABORTWAIT:
case SMC_PROCESSABORT:
@@ -393,23 +422,11 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+ smc_conn_free(conn);
}
- release_sock(&smc->sk);
-}
-
-void smc_close_sock_put_work(struct work_struct *work)
-{
- struct smc_sock *smc = container_of(to_delayed_work(work),
- struct smc_sock,
- sock_put_work);
-
- smc->sk.sk_prot->unhash(&smc->sk);
- sock_put(&smc->sk);
+ release_sock(sk);
+ sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_shutdown_write(struct smc_sock *smc)
@@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_ACTIVE)
+ goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (sk->sk_state == SMC_ACTIVE)
- sk->sk_state = SMC_PEERCLOSEWAIT1;
- else
- goto again;
+ if (rc)
+ break;
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
/* passive close */
@@ -446,8 +464,12 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_APPCLOSEWAIT1)
+ goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
@@ -462,7 +484,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index ed82506b1b0a..19eb6a211c23 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -20,9 +20,7 @@
#define SMC_CLOSE_SOCK_PUT_DELAY HZ
void smc_close_wake_tx_prepared(struct smc_sock *smc);
-void smc_close_active_abort(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 94f21116dac5..2424c7100aaf 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work)
bool conns;
spin_lock_bh(&smc_lgr_list.lock);
+ if (list_empty(&lgr->list))
+ goto free;
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
@@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
+free:
spin_unlock_bh(&smc_lgr_list.lock);
smc_lgr_free(lgr);
}
@@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn)
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!lgr)
+ if (!conn->lgr)
return;
smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
@@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
while (node) {
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk);
+ sock_hold(&smc->sk); /* sock_put in close work */
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
__smc_lgr_unregister_conn(conn);
- schedule_work(&conn->close_work);
- sock_put(&smc->sk);
+ write_unlock_bh(&lgr->conns_lock);
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
+ write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);
+ wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
}
/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index d2d01cf70224..427b91c1c964 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout;
- if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
+ smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+ !list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 90f1a7f9085c..2a8957bd6d38 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -141,6 +141,17 @@ out:
return rc;
}
+static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *l;
+
+ list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+ if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+ lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+ smc_lgr_terminate(lgr);
+ }
+}
+
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
@@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
+ if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ smc_ib_port_terminate(smcibdev, port_idx + 1);
}
}
@@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_PORT_ERR:
- port_idx = ibevent->element.port_num - 1;
- set_bit(port_idx, &smcibdev->port_event_mask);
- schedule_work(&smcibdev->port_event_work);
- /* fall through */
case IB_EVENT_DEVICE_FATAL:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
- break;
case IB_EVENT_PORT_ACTIVE:
port_idx = ibevent->element.port_num - 1;
set_bit(port_idx, &smcibdev->port_event_mask);
@@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
{
- ib_dealloc_pd(lnk->roce_pd);
+ if (lnk->roce_pd)
+ ib_dealloc_pd(lnk->roce_pd);
lnk->roce_pd = NULL;
}
@@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
{
+ struct smc_ib_device *smcibdev =
+ (struct smc_ib_device *)ibevent->device;
+ u8 port_idx;
+
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
case IB_EVENT_GID_CHANGE:
case IB_EVENT_PORT_ERR:
case IB_EVENT_QP_ACCESS_ERR:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
+ port_idx = ibevent->element.port_num - 1;
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
break;
default:
break;
@@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
void smc_ib_destroy_queue_pair(struct smc_link *lnk)
{
- ib_destroy_qp(lnk->roce_qp);
+ if (lnk->roce_qp)
+ ib_destroy_qp(lnk->roce_qp);
lnk->roce_qp = NULL;
}
@@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
{
if (!smcibdev->initialized)
return;
+ smcibdev->initialized = 0;
smc_wr_remove_dev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
ib_destroy_cq(smcibdev->roce_cq_recv);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index cbf58637ee14..9dc392ca06bf 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
sk->sk_shutdown & RCV_SHUTDOWN ||
- sock_flag(sk, SOCK_DONE) ||
atomic_read(&conn->bytes_to_rcv) ||
smc_cdc_rxed_any_close_or_senddone(conn),
&wait);
@@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
if (read_done) {
if (sk->sk_err ||
sk->sk_state == SMC_CLOSED ||
- (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ sk->sk_shutdown & RCV_SHUTDOWN ||
!timeo ||
signal_pending(current) ||
smc_cdc_rxed_any_close_or_senddone(conn) ||
@@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
peer_conn_abort)
break;
} else {
- if (sock_flag(sk, SOCK_DONE))
- break;
if (sk->sk_err) {
read_done = sock_error(sk);
break;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index c48dc2d5fd3a..838bce20c361 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
rc = -EPIPE;
break;
}
- if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ if (smc_cdc_rxed_any_close(conn)) {
rc = -ECONNRESET;
break;
}
@@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
if (atomic_read(&conn->sndbuf_space))
break; /* at least 1 byte of free space available */
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- sk->sk_write_pending++;
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
+ smc_cdc_rxed_any_close(conn) ||
atomic_read(&conn->sndbuf_space),
&wait);
- sk->sk_write_pending--;
}
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
@@ -250,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
- if (rc)
+ if (rc) {
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
- schedule_delayed_work(&conn->tx_work,
- SMC_TX_WORK_DELAY);
+ if (conn->alert_token_local) /* connection healthy */
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -440,19 +441,24 @@ static void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
+ if (smc->sk.sk_err ||
+ !conn->alert_token_local ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ goto out;
+
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+
+out:
release_sock(&smc->sk);
}
void smc_tx_consumer_update(struct smc_connection *conn)
{
union smc_host_cursor cfed, cons;
- struct smc_cdc_tx_pend *pend;
- struct smc_wr_buf *wr_buf;
- int to_confirm, rc;
+ int to_confirm;
smc_curs_write(&cons,
smc_curs_read(&conn->local_tx_ctrl.cons, conn),
@@ -466,10 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
- if (!rc)
- rc = smc_cdc_msg_send(conn, wr_buf, pend);
- if (rc < 0) {
+ if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
+ conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index de4537f66832..1b8af23e6e2b 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data)
again:
polled++;
do {
+ memset(&wc, 0, sizeof(wc));
rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
if (polled == 1) {
ib_req_notify_cq(dev->roce_cq_send,
@@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
+ u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
u64 wr_id;
- u32 idx;
int rc;
*wr_buf = NULL;
@@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc)
return rc;
} else {
- rc = wait_event_interruptible_timeout(
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ rc = wait_event_timeout(
link->wr_tx_wait,
+ list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- struct smc_link_group *lgr;
-
- lgr = container_of(link, struct smc_link_group,
- lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr);
return -EPIPE;
}
- if (rc == -ERESTARTSYS)
- return -EINTR;
if (idx == link->wr_tx_cnt)
return -EPIPE;
}
@@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
pend = container_of(priv, struct smc_wr_tx_pend, priv);
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
&failed_wr);
- if (rc)
+ if (rc) {
+ struct smc_link_group *lgr =
+ container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
smc_wr_tx_put_slot(link, priv);
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
return rc;
}
-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
+void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data)
{
struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
+ struct smc_wr_rx_hdr *wr_tx;
int i;
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
+ wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
+ if (wr_tx->type != wr_tx_hdr_type)
continue;
tx_pend = &link->wr_tx_pends[i].priv;
if (filter(tx_pend, data))
@@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
}
}
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data)
-{
- struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
- int i;
-
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
- continue;
- tx_pend = &link->wr_tx_pends[i].priv;
- if (filter(tx_pend, data))
- return true;
- }
- return false;
-}
-
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2acf12b06063..ef0c3494c9cb 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link,
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
diff --git a/net/socket.c b/net/socket.c
index 42d8e9c9ccd5..a93c99b518ca 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -118,7 +118,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
-static unsigned int sock_poll(struct file *file,
+static __poll_t sock_poll(struct file *file,
struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
@@ -163,12 +163,6 @@ static DEFINE_SPINLOCK(net_family_lock);
static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/*
- * Statistics counters of the socket lists
- */
-
-static DEFINE_PER_CPU(int, sockets_in_use);
-
-/*
* Support routines.
* Move socket addresses back and forth across the kernel/user
* divide and look after the messy bits.
@@ -406,8 +400,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
name.len = strlen(name.name);
}
path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
- if (unlikely(!path.dentry))
+ if (unlikely(!path.dentry)) {
+ sock_release(sock);
return ERR_PTR(-ENOMEM);
+ }
path.mnt = mntget(sock_mnt);
d_instantiate(path.dentry, SOCK_INODE(sock));
@@ -415,9 +411,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
&socket_file_ops);
if (IS_ERR(file)) {
- /* drop dentry, keep inode */
+ /* drop dentry, keep inode for a bit */
ihold(d_inode(path.dentry));
path_put(&path);
+ /* ... and now kill it properly */
+ sock_release(sock);
return file;
}
@@ -432,8 +430,10 @@ static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = get_unused_fd_flags(flags);
- if (unlikely(fd < 0))
+ if (unlikely(fd < 0)) {
+ sock_release(sock);
return fd;
+ }
newfile = sock_alloc_file(sock, flags, NULL);
if (likely(!IS_ERR(newfile))) {
@@ -574,7 +574,6 @@ struct socket *sock_alloc(void)
inode->i_gid = current_fsgid();
inode->i_op = &sockfs_inode_ops;
- this_cpu_add(sockets_in_use, 1);
return sock;
}
EXPORT_SYMBOL(sock_alloc);
@@ -601,7 +600,6 @@ void sock_release(struct socket *sock)
if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
pr_err("%s: fasync list not empty!\n", __func__);
- this_cpu_sub(sockets_in_use, 1);
if (!sock->file) {
iput(SOCK_INODE(sock));
return;
@@ -963,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* If this ioctl is unknown try to hand it down
* to the NIC driver.
*/
- if (err == -ENOIOCTLCMD)
- err = dev_ioctl(net, cmd, argp);
+ if (err != -ENOIOCTLCMD)
+ return err;
+ if (cmd == SIOCGIFCONF) {
+ struct ifconf ifc;
+ if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
+ return -EFAULT;
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
+ rtnl_unlock();
+ if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
+ err = -EFAULT;
+ } else {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
return err;
}
@@ -990,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
sock = file->private_data;
sk = sock->sk;
net = sock_net(sk);
- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
- err = dev_ioctl(net, cmd, argp);
+ if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
} else
#ifdef CONFIG_WEXT_CORE
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- err = dev_ioctl(net, cmd, argp);
+ err = wext_handle_ioctl(net, cmd, argp);
} else
#endif
switch (cmd) {
@@ -1091,9 +1115,9 @@ out_release:
EXPORT_SYMBOL(sock_create_lite);
/* No kernel lock held - perfect */
-static unsigned int sock_poll(struct file *file, poll_table *wait)
+static __poll_t sock_poll(struct file *file, poll_table *wait)
{
- unsigned int busy_flag = 0;
+ __poll_t busy_flag = 0;
struct socket *sock;
/*
@@ -1330,19 +1354,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
- goto out;
-
- retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
- if (retval < 0)
- goto out_release;
-
-out:
- /* It may be already another descriptor 8) Not kernel problem. */
- return retval;
+ return retval;
-out_release:
- sock_release(sock);
- return retval;
+ return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}
/*
@@ -1366,87 +1380,72 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
/*
+ * reserve descriptors and make sure we won't fail
+ * to return them to userland.
+ */
+ fd1 = get_unused_fd_flags(flags);
+ if (unlikely(fd1 < 0))
+ return fd1;
+
+ fd2 = get_unused_fd_flags(flags);
+ if (unlikely(fd2 < 0)) {
+ put_unused_fd(fd1);
+ return fd2;
+ }
+
+ err = put_user(fd1, &usockvec[0]);
+ if (err)
+ goto out;
+
+ err = put_user(fd2, &usockvec[1]);
+ if (err)
+ goto out;
+
+ /*
* Obtain the first socket and check if the underlying protocol
* supports the socketpair call.
*/
err = sock_create(family, type, protocol, &sock1);
- if (err < 0)
+ if (unlikely(err < 0))
goto out;
err = sock_create(family, type, protocol, &sock2);
- if (err < 0)
- goto out_release_1;
-
- err = sock1->ops->socketpair(sock1, sock2);
- if (err < 0)
- goto out_release_both;
-
- fd1 = get_unused_fd_flags(flags);
- if (unlikely(fd1 < 0)) {
- err = fd1;
- goto out_release_both;
+ if (unlikely(err < 0)) {
+ sock_release(sock1);
+ goto out;
}
- fd2 = get_unused_fd_flags(flags);
- if (unlikely(fd2 < 0)) {
- err = fd2;
- goto out_put_unused_1;
+ err = sock1->ops->socketpair(sock1, sock2);
+ if (unlikely(err < 0)) {
+ sock_release(sock2);
+ sock_release(sock1);
+ goto out;
}
newfile1 = sock_alloc_file(sock1, flags, NULL);
if (IS_ERR(newfile1)) {
err = PTR_ERR(newfile1);
- goto out_put_unused_both;
+ sock_release(sock2);
+ goto out;
}
newfile2 = sock_alloc_file(sock2, flags, NULL);
if (IS_ERR(newfile2)) {
err = PTR_ERR(newfile2);
- goto out_fput_1;
+ fput(newfile1);
+ goto out;
}
- err = put_user(fd1, &usockvec[0]);
- if (err)
- goto out_fput_both;
-
- err = put_user(fd2, &usockvec[1]);
- if (err)
- goto out_fput_both;
-
audit_fd_pair(fd1, fd2);
fd_install(fd1, newfile1);
fd_install(fd2, newfile2);
- /* fd1 and fd2 may be already another descriptors.
- * Not kernel problem.
- */
-
return 0;
-out_fput_both:
- fput(newfile2);
- fput(newfile1);
- put_unused_fd(fd2);
- put_unused_fd(fd1);
- goto out;
-
-out_fput_1:
- fput(newfile1);
- put_unused_fd(fd2);
- put_unused_fd(fd1);
- sock_release(sock2);
- goto out;
-
-out_put_unused_both:
+out:
put_unused_fd(fd2);
-out_put_unused_1:
put_unused_fd(fd1);
-out_release_both:
- sock_release(sock2);
-out_release_1:
- sock_release(sock1);
-out:
return err;
}
@@ -1562,7 +1561,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
if (IS_ERR(newfile)) {
err = PTR_ERR(newfile);
put_unused_fd(newfd);
- sock_release(newsock);
goto out_put;
}
@@ -2644,17 +2642,8 @@ core_initcall(sock_init); /* early initcall */
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
- int cpu;
- int counter = 0;
-
- for_each_possible_cpu(cpu)
- counter += per_cpu(sockets_in_use, cpu);
-
- /* It can be negative, by the way. 8) */
- if (counter < 0)
- counter = 0;
-
- seq_printf(seq, "sockets: used %d\n", counter);
+ seq_printf(seq, "sockets: used %d\n",
+ sock_inuse_get(seq->private));
}
#endif /* CONFIG_PROC_FS */
@@ -2691,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
return err;
}
-static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(struct ifreq));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- err = dev_ioctl(net, SIOCGIFNAME, uifr);
- if (err)
- return err;
-
- if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- return 0;
-}
-
-static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
{
struct compat_ifconf ifc32;
struct ifconf ifc;
- struct ifconf __user *uifc;
- struct compat_ifreq __user *ifr32;
- struct ifreq __user *ifr;
- unsigned int i, j;
int err;
if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
- memset(&ifc, 0, sizeof(ifc));
- if (ifc32.ifcbuf == 0) {
- ifc32.ifc_len = 0;
- ifc.ifc_len = 0;
- ifc.ifc_req = NULL;
- uifc = compat_alloc_user_space(sizeof(struct ifconf));
- } else {
- size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
- sizeof(struct ifreq);
- uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
- ifc.ifc_len = len;
- ifr = ifc.ifc_req = (void __user *)(uifc + 1);
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
- if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr++;
- ifr32++;
- }
- }
- if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
- return -EFAULT;
+ ifc.ifc_len = ifc32.ifc_len;
+ ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
- err = dev_ioctl(net, SIOCGIFCONF, uifc);
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
+ rtnl_unlock();
if (err)
return err;
- if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
- return -EFAULT;
-
- ifr = ifc.ifc_req;
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0, j = 0;
- i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
- i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
- if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr32++;
- ifr++;
- }
-
- if (ifc32.ifcbuf == 0) {
- /* Translate from 64-bit structure multiple to
- * a 32-bit one.
- */
- i = ifc.ifc_len;
- i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
- ifc32.ifc_len = i;
- } else {
- ifc32.ifc_len = i;
- }
+ ifc32.ifc_len = ifc.ifc_len;
if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
@@ -2784,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
{
struct compat_ethtool_rxnfc __user *compat_rxnfc;
bool convert_in = false, convert_out = false;
- size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
- struct ethtool_rxnfc __user *rxnfc;
- struct ifreq __user *ifr;
+ size_t buf_size = 0;
+ struct ethtool_rxnfc __user *rxnfc = NULL;
+ struct ifreq ifr;
u32 rule_cnt = 0, actual_rule_cnt;
u32 ethcmd;
u32 data;
@@ -2823,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
case ETHTOOL_SRXCLSRLDEL:
buf_size += sizeof(struct ethtool_rxnfc);
convert_in = true;
+ rxnfc = compat_alloc_user_space(buf_size);
break;
}
- ifr = compat_alloc_user_space(buf_size);
- rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
-
- if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+ if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (put_user(convert_in ? rxnfc : compat_ptr(data),
- &ifr->ifr_ifru.ifru_data))
- return -EFAULT;
+ ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
if (convert_in) {
/* We expect there to be holes between fs.m_ext and
@@ -2862,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
return -EFAULT;
}
- ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+ ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
if (ret)
return ret;
@@ -2903,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
{
- void __user *uptr;
compat_uptr_t uptr32;
- struct ifreq __user *uifr;
+ struct ifreq ifr;
+ void __user *saved;
+ int err;
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
+ if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
return -EFAULT;
- uptr = compat_ptr(uptr32);
-
- if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
- return -EFAULT;
-
- return dev_ioctl(net, SIOCWANDEV, uifr);
-}
+ saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
-static int bond_ioctl(struct net *net, unsigned int cmd,
- struct compat_ifreq __user *ifr32)
-{
- struct ifreq kifr;
- mm_segment_t old_fs;
- int err;
-
- switch (cmd) {
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd,
- (struct ifreq __user __force *) &kifr);
- set_fs(old_fs);
-
- return err;
- default:
- return -ENOIOCTLCMD;
+ err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
+ if (!err) {
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
+ if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+ err = -EFAULT;
}
+ return err;
}
/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
struct compat_ifreq __user *u_ifreq32)
{
- struct ifreq __user *u_ifreq64;
- char tmp_buf[IFNAMSIZ];
- void __user *data64;
+ struct ifreq ifreq;
u32 data32;
- if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
- IFNAMSIZ))
+ if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
+ if (get_user(data32, &u_ifreq32->ifr_data))
return -EFAULT;
- data64 = compat_ptr(data32);
+ ifreq.ifr_data = compat_ptr(data32);
- u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
-
- if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
- IFNAMSIZ))
- return -EFAULT;
- if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
- return -EFAULT;
-
- return dev_ioctl(net, cmd, u_ifreq64);
-}
-
-static int dev_ifsioc(struct net *net, struct socket *sock,
- unsigned int cmd, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
-
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
- if (!err) {
- switch (cmd) {
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFPFLAGS:
- case SIOCGIFTXQLEN:
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
- err = -EFAULT;
- break;
- }
- }
- return err;
+ return dev_ioctl(net, cmd, &ifreq, NULL);
}
static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
@@ -3017,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
{
struct ifreq ifr;
struct compat_ifmap __user *uifmap32;
- mm_segment_t old_fs;
int err;
uifmap32 = &uifr32->ifr_ifru.ifru_map;
@@ -3031,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
if (err)
return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
- set_fs(old_fs);
+ err = dev_ioctl(net, cmd, &ifr, NULL);
if (cmd == SIOCGIFMAP && !err) {
err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -3167,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
- case SIOCGIFNAME:
- return dev_ifname32(net, argp);
case SIOCGIFCONF:
- return dev_ifconf(net, argp);
+ return compat_dev_ifconf(net, argp);
case SIOCETHTOOL:
return ethtool_ioctl(net, argp);
case SIOCWANDEV:
@@ -3178,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGIFMAP:
case SIOCSIFMAP:
return compat_sioc_ifmap(net, cmd, argp);
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- return bond_ioctl(net, cmd, argp);
case SIOCADDRT:
case SIOCDELRT:
return routing_ioctl(net, sock, cmd, argp);
@@ -3242,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- return dev_ifsioc(net, sock, cmd, argp);
-
case SIOCSARP:
case SIOCGARP:
case SIOCDARP:
case SIOCATMARK:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDCHANGEACTIVE:
+ case SIOCGIFNAME:
return sock_do_ioctl(net, sock, cmd, arg);
}
@@ -3402,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage_locked);
-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
-{
- mm_segment_t oldfs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = sock->ops->ioctl(sock, cmd, arg);
- set_fs(oldfs);
-
- return err;
-}
-EXPORT_SYMBOL(kernel_sock_ioctl);
-
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
return sock->ops->shutdown(sock, how);
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba319..1fdab5c4eda8 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
* allows a thread in BH context to safely check if the process
* lock is held. In this case, if the lock is held, queue work.
*/
- if (sock_owned_by_user(strp->sk)) {
+ if (sock_owned_by_user_nocheck(strp->sk)) {
queue_work(strp_wq, &strp->work);
return;
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index c4778cae58ef..444380f968f1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
goto out_free_groups;
creds->cr_group_info->gid[i] = kgid;
}
+ groups_sort(creds->cr_group_info);
return 0;
out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73165e9ca5bf..26531193fce4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -264,7 +264,7 @@ out:
return status;
}
-static struct cache_detail rsi_cache_template = {
+static const struct cache_detail rsi_cache_template = {
.owner = THIS_MODULE,
.hash_size = RSI_HASHMAX,
.name = "auth.rpcsec.init",
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
goto out;
rsci.cred.cr_group_info->gid[i] = kgid;
}
+ groups_sort(rsci.cred.cr_group_info);
/* mech name */
len = qword_get(&mesg, buf, mlen);
@@ -524,7 +525,7 @@ out:
return status;
}
-static struct cache_detail rsc_cache_template = {
+static const struct cache_detail rsc_cache_template = {
.owner = THIS_MODULE,
.hash_size = RSC_HASHMAX,
.name = "auth.rpcsec.context",
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 79d55d949d9a..aa36dad32db1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -930,10 +930,10 @@ out:
static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
-static unsigned int cache_poll(struct file *filp, poll_table *wait,
+static __poll_t cache_poll(struct file *filp, poll_table *wait,
struct cache_detail *cd)
{
- unsigned int mask;
+ __poll_t mask;
struct cache_reader *rp = filp->private_data;
struct cache_queue *cq;
@@ -1501,7 +1501,7 @@ static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
return cache_write(filp, buf, count, ppos, cd);
}
-static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
+static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
{
struct cache_detail *cd = PDE_DATA(file_inode(filp));
@@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
}
EXPORT_SYMBOL_GPL(cache_unregister_net);
-struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net)
+struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net)
{
struct cache_detail *cd;
int i;
@@ -1720,7 +1720,7 @@ static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
return cache_write(filp, buf, count, ppos, cd);
}
-static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
+static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait)
{
struct cache_detail *cd = RPC_I(file_inode(filp))->private;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a801da812f86..6e432ecd7f99 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1376,22 +1376,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
EXPORT_SYMBOL_GPL(rpc_setbufsize);
/**
- * rpc_protocol - Get transport protocol number for an RPC client
- * @clnt: RPC client to query
- *
- */
-int rpc_protocol(struct rpc_clnt *clnt)
-{
- int protocol;
-
- rcu_read_lock();
- protocol = rcu_dereference(clnt->cl_xprt)->prot;
- rcu_read_unlock();
- return protocol;
-}
-EXPORT_SYMBOL_GPL(rpc_protocol);
-
-/**
* rpc_net_ns - Get the network namespace for this RPC client
* @clnt: RPC client to query
*
@@ -1841,6 +1825,7 @@ call_bind_status(struct rpc_task *task)
case -ECONNABORTED:
case -ENOTCONN:
case -EHOSTDOWN:
+ case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -ENOBUFS:
@@ -1917,6 +1902,7 @@ call_connect_status(struct rpc_task *task)
/* fall through */
case -ECONNRESET:
case -ECONNABORTED:
+ case -ENETDOWN:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EADDRINUSE:
@@ -2022,6 +2008,7 @@ call_transmit_status(struct rpc_task *task)
*/
case -ECONNREFUSED:
case -EHOSTDOWN:
+ case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
@@ -2071,6 +2058,7 @@ call_bc_transmit(struct rpc_task *task)
switch (task->tk_status) {
case 0:
/* Success */
+ case -ENETDOWN:
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
@@ -2139,6 +2127,7 @@ call_status(struct rpc_task *task)
task->tk_status = 0;
switch(status) {
case -EHOSTDOWN:
+ case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7803f3b6aa53..5c4330325787 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -340,12 +340,12 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
return res;
}
-static unsigned int
+static __poll_t
rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
{
struct inode *inode = file_inode(filp);
struct rpc_inode *rpci = RPC_I(inode);
- unsigned int mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = POLLOUT | POLLWRNORM;
poll_wait(filp, &rpci->waitq, wait);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b1b49edd7c4d..896691afbb1a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
void (*do_action)(struct rpc_task *);
/*
- * Execute any pending callback first.
+ * Perform the next FSM step or a pending callback.
+ *
+ * tk_action may be NULL if the task has been killed.
+ * In particular, note that rpc_killall_tasks may
+ * do this at any time, so beware when dereferencing.
*/
- do_action = task->tk_callback;
- task->tk_callback = NULL;
- if (do_action == NULL) {
- /*
- * Perform the next FSM step.
- * tk_action may be NULL if the task has been killed.
- * In particular, note that rpc_killall_tasks may
- * do this at any time, so beware when dereferencing.
- */
- do_action = task->tk_action;
- if (do_action == NULL)
- break;
+ do_action = task->tk_action;
+ if (task->tk_callback) {
+ do_action = task->tk_callback;
+ task->tk_callback = NULL;
}
- trace_rpc_task_run_action(task->tk_client, task, task->tk_action);
+ if (!do_action)
+ break;
+ trace_rpc_task_run_action(task->tk_client, task, do_action);
do_action(task);
/*
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index f81eaa8e0888..af7f28fb8102 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
ug.gi->gid[i] = kgid;
}
+ groups_sort(ug.gi);
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
@@ -569,7 +570,7 @@ static int unix_gid_show(struct seq_file *m,
return 0;
}
-static struct cache_detail unix_gid_cache_template = {
+static const struct cache_detail unix_gid_cache_template = {
.owner = THIS_MODULE,
.hash_size = GID_HASHMAX,
.name = "auth.unix.gid",
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
cred->cr_group_info->gid[i] = kgid;
}
+ groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
@@ -862,7 +864,7 @@ struct auth_ops svcauth_unix = {
.set_client = svcauth_unix_set_client,
};
-static struct cache_detail ip_map_cache_template = {
+static const struct cache_detail ip_map_cache_template = {
.owner = THIS_MODULE,
.hash_size = IP_HASHMAX,
.name = "auth.unix.ip",
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ff8e06cd067e..5570719e4787 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -338,8 +338,8 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
- msg.msg_flags);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
+ len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 333b9d697ae5..2436fd1125fc 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
if (task->tk_status != -ETIMEDOUT)
return;
- dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
+ trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
if (!req->rq_reply_bytes_recvd) {
if (xprt->ops->timer)
xprt->ops->timer(xprt, task);
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ unsigned int connect_cookie;
int status, numreqs;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
} else if (!req->rq_bytes_sent)
return;
+ connect_cookie = xprt->connect_cookie;
req->rq_xtime = ktime_get();
status = xprt->ops->send_request(task);
trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
xprt->stat.bklog_u += xprt->backlog.qlen;
xprt->stat.sending_u += xprt->sending.qlen;
xprt->stat.pending_u += xprt->pending.qlen;
+ spin_unlock_bh(&xprt->transport_lock);
- /* Don't race with disconnect */
- if (!xprt_connected(xprt))
- task->tk_status = -ENOTCONN;
- else {
+ req->rq_connect_cookie = connect_cookie;
+ if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
/*
- * Sleep on the pending queue since
- * we're expecting a reply.
+ * Sleep on the pending queue if we're expecting a reply.
+ * The spinlock ensures atomicity between the test of
+ * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
*/
- if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task))
+ spin_lock(&xprt->recv_lock);
+ if (!req->rq_reply_bytes_recvd) {
rpc_sleep_on(&xprt->pending, task, xprt_timer);
- req->rq_connect_cookie = xprt->connect_cookie;
+ /*
+ * Send an extra queue wakeup call if the
+ * connection was dropped in case the call to
+ * rpc_sleep_on() raced.
+ */
+ if (!xprt_connected(xprt))
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ }
+ spin_unlock(&xprt->recv_lock);
}
- spin_unlock_bh(&xprt->transport_lock);
}
static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 8b818bb3518a..ed1a4a3065ee 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
- __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
@@ -74,21 +73,13 @@ out_fail:
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
- struct rpcrdma_rep *rep;
int rc = 0;
while (count--) {
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- pr_err("RPC: %s: reply buffer alloc failed\n",
- __func__);
- rc = PTR_ERR(rep);
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
break;
- }
-
- rpcrdma_recv_buffer_put(rep);
}
-
return rc;
}
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
+ __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free;
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
buffer->rb_bc_srv_max_requests = reqs;
request_module("svcrdma");
-
+ trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
out_free:
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
-/**
- * rpcrdma_bc_marshal_reply - Send backwards direction reply
- * @rqst: buffer containing RPC reply data
- *
- * Returns zero on success.
- */
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
+static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch))
return -EIO;
+
+ trace_xprtrdma_cb_reply(rqst);
+ return 0;
+}
+
+/**
+ * xprt_rdma_bc_send_reply - marshal and send a backchannel reply
+ * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
+ *
+ * Caller holds the transport's write lock.
+ *
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
+ */
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ int rc;
+
+ if (!xprt_connected(rqst->rq_xprt))
+ goto drop_connection;
+
+ rc = rpcrdma_bc_marshal_reply(rqst);
+ if (rc < 0)
+ goto failed_marshal;
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ goto drop_connection;
return 0;
+
+failed_marshal:
+ if (rc != -ENOTCONN)
+ return rc;
+drop_connection:
+ xprt_disconnect_done(rqst->rq_xprt);
+ return -ENOTCONN;
}
/**
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
- smp_mb__before_atomic();
- WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
- clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
- smp_mb__after_atomic();
-
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
/**
* rpcrdma_bc_receive_call - Handle a backward direction call
- * @xprt: transport receiving the call
+ * @r_xprt: transport receiving the call
* @rep: receive buffer containing the call
*
* Operational assumptions:
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
- dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
- set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf;
memset(buf, 0, sizeof(*buf));
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* the Upper Layer is done decoding it.
*/
req = rpcr_to_rdmar(rqst);
- dprintk("RPC: %s: attaching rep %p to req %p\n",
- __func__, rep, req);
req->rl_reply = rep;
-
- /* Defeat the retransmit detection logic in send_request */
- req->rl_connect_cookie = 0;
+ trace_xprtrdma_cb_call(rqst);
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 29fc84c7ff98..d5f95bb39300 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
}
static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
+fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
static struct ib_fmr_attr fmr_attr = {
.max_pages = RPCRDMA_MAX_FMR_SGES,
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
.page_shift = PAGE_SHIFT
};
- mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(u64), GFP_KERNEL);
- if (!mw->fmr.fm_physaddrs)
+ if (!mr->fmr.fm_physaddrs)
goto out_free;
- mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(*mw->mw_sg), GFP_KERNEL);
- if (!mw->mw_sg)
+ mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_free;
- sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES);
+ sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
- mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
+ mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
&fmr_attr);
- if (IS_ERR(mw->fmr.fm_mr))
+ if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err;
return 0;
out_fmr_err:
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
- PTR_ERR(mw->fmr.fm_mr));
+ PTR_ERR(mr->fmr.fm_mr));
out_free:
- kfree(mw->mw_sg);
- kfree(mw->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
+ kfree(mr->fmr.fm_physaddrs);
return -ENOMEM;
}
static int
-__fmr_unmap(struct rpcrdma_mw *mw)
+__fmr_unmap(struct rpcrdma_mr *mr)
{
LIST_HEAD(l);
int rc;
- list_add(&mw->fmr.fm_mr->list, &l);
+ list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
- list_del(&mw->fmr.fm_mr->list);
+ list_del(&mr->fmr.fm_mr->list);
return rc;
}
static void
-fmr_op_release_mr(struct rpcrdma_mw *r)
+fmr_op_release_mr(struct rpcrdma_mr *mr)
{
LIST_HEAD(unmap_list);
int rc;
/* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- kfree(r->fmr.fm_physaddrs);
- kfree(r->mw_sg);
+ kfree(mr->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
- rc = __fmr_unmap(r);
+ rc = __fmr_unmap(mr);
if (rc)
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
- r, rc);
+ mr, rc);
- rc = ib_dealloc_fmr(r->fmr.fm_mr);
+ rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc)
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
- r, rc);
+ mr, rc);
- kfree(r);
+ kfree(mr);
}
/* Reset of a single FMR.
*/
static void
-fmr_op_recover_mr(struct rpcrdma_mw *mw)
+fmr_op_recover_mr(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
int rc;
/* ORDER: invalidate first */
- rc = __fmr_unmap(mw);
-
- /* ORDER: then DMA unmap */
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ rc = __fmr_unmap(mr);
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ /* ORDER: then DMA unmap */
+ rpcrdma_mr_unmap_and_put(mr);
+
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw);
+ pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ trace_xprtrdma_dma_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- fmr_op_release_mr(mw);
+ fmr_op_release_mr(mr);
}
static int
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
*/
static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_mr_seg *seg1 = seg;
int len, pageoff, i, rc;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
u64 *dma_pages;
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset);
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
len += seg->mr_len;
++seg;
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
- dma_pages[i] = sg_dma_address(&mw->mw_sg[i]);
- rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents,
+ for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
+ dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
+ rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
dma_pages[0]);
if (rc)
goto out_maperr;
- mw->mw_handle = mw->fmr.fm_mr->rkey;
- mw->mw_length = len;
- mw->mw_offset = dma_pages[0] + pageoff;
+ mr->mr_handle = mr->fmr.fm_mr->rkey;
+ mr->mr_length = len;
+ mr->mr_offset = dma_pages[0] + pageoff;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
- pageoff, mw->mw_nents, rc);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ pageoff, mr->mr_nents, rc);
+ rpcrdma_mr_unmap_and_put(mr);
return ERR_PTR(-EIO);
}
@@ -256,13 +256,13 @@ out_maperr:
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
LIST_HEAD(unmap_list);
int rc;
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR.
*/
- list_for_each_entry(mw, mws, mw_list) {
+ list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+ __func__, &mr->fmr);
+ trace_xprtrdma_localinv(mr);
+ list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_del(&mw->fmr.fm_mr->list);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ list_del(&mr->fmr.fm_mr->list);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- list_del(&mw->fmr.fm_mr->list);
- fmr_op_recover_mr(mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ list_del(&mr->fmr.fm_mr->list);
+ fmr_op_recover_mr(mr);
}
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e10a15..90f688f19783 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
/* Lightweight memory registration using Fast Registration Work
- * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ * Requests (FRWR).
*
* FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
- * Work Request (frmr_op_map). When the RDMA operation is finished, this
+ * Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
- * (frmr_op_unmap).
+ * (frwr_op_unmap_sync).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
*
* As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mws immediately so that no work (like managing a linked list
+ * rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall.
*
* But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mws list when recovery is
+ * recovered MRs are placed back on the rb_mrs list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset.
*
@@ -96,26 +96,26 @@ out_not_supported:
}
static int
-frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- unsigned int depth = ia->ri_max_frmr_depth;
- struct rpcrdma_frmr *f = &r->frmr;
+ unsigned int depth = ia->ri_max_frwr_depth;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
- if (IS_ERR(f->fr_mr))
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
+ if (IS_ERR(frwr->fr_mr))
goto out_mr_err;
- r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
- if (!r->mw_sg)
+ mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_list_err;
- sg_init_table(r->mw_sg, depth);
- init_completion(&f->fr_linv_done);
+ sg_init_table(mr->mr_sg, depth);
+ init_completion(&frwr->fr_linv_done);
return 0;
out_mr_err:
- rc = PTR_ERR(f->fr_mr);
+ rc = PTR_ERR(frwr->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
@@ -124,83 +124,85 @@ out_list_err:
rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n",
__func__);
- ib_dereg_mr(f->fr_mr);
+ ib_dereg_mr(frwr->fr_mr);
return rc;
}
static void
-frwr_op_release_mr(struct rpcrdma_mw *r)
+frwr_op_release_mr(struct rpcrdma_mr *mr)
{
int rc;
- /* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ /* Ensure MR is not on any rl_registered list */
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- rc = ib_dereg_mr(r->frmr.fr_mr);
+ rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- r, rc);
- kfree(r->mw_sg);
- kfree(r);
+ mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static int
-__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- struct rpcrdma_frmr *f = &r->frmr;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- rc = ib_dereg_mr(f->fr_mr);
+ rc = ib_dereg_mr(frwr->fr_mr);
if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
- rc, r);
+ rc, mr);
return rc;
}
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
- ia->ri_max_frmr_depth);
- if (IS_ERR(f->fr_mr)) {
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
+ ia->ri_max_frwr_depth);
+ if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(f->fr_mr), r);
- return PTR_ERR(f->fr_mr);
+ PTR_ERR(frwr->fr_mr), mr);
+ return PTR_ERR(frwr->fr_mr);
}
- dprintk("RPC: %s: recovered FRMR %p\n", __func__, f);
- f->fr_state = FRMR_IS_INVALID;
+ dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
+ frwr->fr_state = FRWR_IS_INVALID;
return 0;
}
-/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
+/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/
static void
-frwr_op_recover_mr(struct rpcrdma_mw *mw)
+frwr_op_recover_mr(struct rpcrdma_mr *mr)
{
- enum rpcrdma_frmr_state state = mw->frmr.fr_state;
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc;
- rc = __frwr_reset_mr(ia, mw);
- if (state != FRMR_FLUSHED_LI)
+ rc = __frwr_mr_reset(ia, mr);
+ if (state != FRWR_FLUSHED_LI) {
+ trace_xprtrdma_dma_unmap(mr);
ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ }
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw);
+ pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- frwr_op_release_mr(mw);
+ frwr_op_release_mr(mr);
}
static int
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
- ia->ri_max_frmr_depth =
+ ia->ri_max_frwr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
- __func__, ia->ri_max_frmr_depth);
-
- /* Add room for frmr register and invalidate WRs.
- * 1. FRMR reg WR for head
- * 2. FRMR invalidate WR for head
- * 3. N FRMR reg WRs for pagelist
- * 4. N FRMR invalidate WRs for pagelist
- * 5. FRMR reg WR for tail
- * 6. FRMR invalidate WR for tail
+ __func__, ia->ri_max_frwr_depth);
+
+ /* Add room for frwr register and invalidate WRs.
+ * 1. FRWR reg WR for head
+ * 2. FRWR invalidate WR for head
+ * 3. N FRWR reg WRs for pagelist
+ * 4. N FRWR invalidate WRs for pagelist
+ * 5. FRWR reg WR for tail
+ * 6. FRWR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
depth = 7;
- /* Calculate N if the device max FRMR depth is smaller than
+ /* Calculate N if the device max FRWR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS.
*/
- if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
- delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+ if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
do {
- depth += 2; /* FRMR reg + invalidate */
- delta -= ia->ri_max_frmr_depth;
+ depth += 2; /* FRWR reg + invalidate */
+ delta -= ia->ri_max_frwr_depth;
} while (delta > 0);
}
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
- ia->ri_max_frmr_depth);
+ ia->ri_max_frwr_depth);
return 0;
}
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth);
+ RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
}
static void
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
static void
frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_FR;
+ frwr->fr_state = FRWR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg");
}
+ trace_xprtrdma_wc_fastreg(wc, frwr);
}
/**
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
+ trace_xprtrdma_wc_li(wc, frwr);
}
/**
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
- complete(&frmr->fr_linv_done);
+ complete(&frwr->fr_linv_done);
+ trace_xprtrdma_wc_li_wake(wc, frwr);
}
/* Post a REG_MR Work Request to register a memory region
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/
static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
- struct rpcrdma_mw *mw;
- struct rpcrdma_frmr *frmr;
- struct ib_mr *mr;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
+ struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n;
u8 key;
- mw = NULL;
+ mr = NULL;
do {
- if (mw)
- rpcrdma_defer_mr_recovery(mw);
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ if (mr)
+ rpcrdma_mr_defer_recovery(mr);
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
- } while (mw->frmr.fr_state != FRMR_IS_INVALID);
- frmr = &mw->frmr;
- frmr->fr_state = FRMR_IS_VALID;
- mr = frmr->fr_mr;
- reg_wr = &frmr->fr_regwr;
-
- if (nsegs > ia->ri_max_frmr_depth)
- nsegs = ia->ri_max_frmr_depth;
+ } while (mr->frwr.fr_state != FRWR_IS_INVALID);
+ frwr = &mr->frwr;
+ frwr->fr_state = FRWR_IS_VALID;
+
+ if (nsegs > ia->ri_max_frwr_depth)
+ nsegs = ia->ri_max_frwr_depth;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
++seg;
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mw->mw_nents))
+ ibmr = frwr->fr_mr;
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
+ if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
- dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
- __func__, frmr, mw->mw_nents, mr->length);
-
- key = (u8)(mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(mr, ++key);
+ key = (u8)(ibmr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(ibmr, ++key);
+ reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR;
- frmr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frmr->fr_cqe;
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0;
- reg_wr->mr = mr;
- reg_wr->key = mr->rkey;
+ reg_wr->mr = ibmr;
+ reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (rc)
goto out_senderr;
- mw->mw_handle = mr->rkey;
- mw->mw_length = mr->length;
- mw->mw_offset = mr->iova;
+ mr->mr_handle = ibmr->rkey;
+ mr->mr_length = ibmr->length;
+ mr->mr_offset = ibmr->iova;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- frmr->fr_state = FRMR_IS_INVALID;
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ frwr->fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
- frmr->fr_mr, n, mw->mw_nents);
- rpcrdma_defer_mr_recovery(mw);
+ frwr->fr_mr, n, mr->mr_nents);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
out_senderr:
- pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
- rpcrdma_defer_mr_recovery(mw);
+ pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-ENOTCONN);
}
+/* Handle a remotely invalidated mr on the @mrs list
+ */
+static void
+frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
+{
+ struct rpcrdma_mr *mr;
+
+ list_for_each_entry(mr, mrs, mr_list)
+ if (mr->mr_handle == rep->rr_inv_rkey) {
+ list_del(&mr->mr_list);
+ trace_xprtrdma_remoteinv(mr);
+ mr->frwr.fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_unmap_and_put(mr);
+ break; /* only one invalidated MR per RPC */
+ }
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_frmr *f;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
int count, rc;
/* ORDER: Invalidate all of the MRs first
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- f = NULL;
+ frwr = NULL;
count = 0;
prev = &first;
- list_for_each_entry(mw, mws, mw_list) {
- mw->frmr.fr_state = FRMR_IS_INVALID;
+ list_for_each_entry(mr, mrs, mr_list) {
+ mr->frwr.fr_state = FRWR_IS_INVALID;
- if (mw->mw_flags & RPCRDMA_MW_F_RI)
- continue;
+ frwr = &mr->frwr;
+ trace_xprtrdma_localinv(mr);
- f = &mw->frmr;
- dprintk("RPC: %s: invalidating frmr %p\n",
- __func__, f);
-
- f->fr_cqe.done = frwr_wc_localinv;
- last = &f->fr_invwr;
+ frwr->fr_cqe.done = frwr_wc_localinv;
+ last = &frwr->fr_invwr;
memset(last, 0, sizeof(*last));
- last->wr_cqe = &f->fr_cqe;
+ last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV;
- last->ex.invalidate_rkey = mw->mw_handle;
+ last->ex.invalidate_rkey = mr->mr_handle;
count++;
*prev = last;
prev = &last->next;
}
- if (!f)
+ if (!frwr)
goto unmap;
/* Strong send queue ordering guarantees that when the
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* are complete.
*/
last->send_flags = IB_SEND_SIGNALED;
- f->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&f->fr_linv_done);
+ frwr->fr_cqe.done = frwr_wc_localinv_wake;
+ reinit_completion(&frwr->fr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first)
- wait_for_completion(&f->fr_linv_done);
+ wait_for_completion(&frwr->fr_linv_done);
if (rc)
goto reset_mrs;
/* ORDER: Now DMA unmap all of the MRs, and return
- * them to the free MW list.
+ * them to the free MR list.
*/
unmap:
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping frmr %p\n",
- __func__, &mw->frmr);
- ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
reset_mrs:
- pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
+ pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
while (bad_wr) {
- f = container_of(bad_wr, struct rpcrdma_frmr,
- fr_invwr);
- mw = container_of(f, struct rpcrdma_mw, frmr);
+ frwr = container_of(bad_wr, struct rpcrdma_frwr,
+ fr_invwr);
+ mr = container_of(frwr, struct rpcrdma_mr, frwr);
- __frwr_reset_mr(ia, mw);
+ __frwr_mr_reset(ia, mr);
bad_wr = bad_wr->next;
}
@@ -553,6 +561,7 @@ reset_mrs:
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 560712bd9fa2..a762d192372b 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,18 +1,20 @@
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/
/* rpcrdma.ko module initialization
*/
+#include <linux/types.h>
+#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
-#include "xprt_rdma.h"
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
+#include <asm/swab.h>
+
+#define CREATE_TRACE_POINTS
+#include "xprt_rdma.h"
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ed34dc0f144c..162e5dd82466 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
}
static void
-xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
+xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{
- *iptr++ = cpu_to_be32(mw->mw_handle);
- *iptr++ = cpu_to_be32(mw->mw_length);
- xdr_encode_hyper(iptr, mw->mw_offset);
+ *iptr++ = cpu_to_be32(mr->mr_handle);
+ *iptr++ = cpu_to_be32(mr->mr_length);
+ xdr_encode_hyper(iptr, mr->mr_offset);
}
static int
-encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
+encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{
__be32 *p;
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
if (unlikely(!p))
return -EMSGSIZE;
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
static int
-encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
+encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
u32 position)
{
__be32 *p;
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
*p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position);
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int pos;
int nsegs;
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- false, &mw);
+ false, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_read_segment(xdr, mw, pos) < 0)
+ if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__, pos,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
r_xprt->rx_stats.read_chunk_count++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
return 0;
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.write_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in this Write chunk */
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.reply_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in the Reply chunk */
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
struct ib_sge *sge;
unsigned int count;
- dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
- __func__, sc->sc_unmap_count, sc);
-
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p;
int ret;
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
- return rpcrdma_bc_marshal_reply(rqst);
-#endif
-
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rtype = rpcrdma_areadch;
}
+ /* If this is a retransmit, discard previously registered
+ * chunks. Very likely the connection has been replaced,
+ * so these registrations are invalid and unusable.
+ */
+ while (unlikely(!list_empty(&req->rl_registered))) {
+ struct rpcrdma_mr *mr;
+
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ rpcrdma_mr_defer_recovery(mr);
+ }
+
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
if (ret)
goto out_err;
- dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n",
- rqst->rq_task->tk_pid, __func__,
- transfertypes[rtype], transfertypes[wtype],
- xdr_stream_pos(xdr));
+ trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
&rqst->rq_snd_buf, rtype);
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len)
curlen = copy_len;
- dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
- __func__, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup(rqst, copy_len, curlen);
srcp += curlen;
copy_len -= curlen;
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > pagelist_len)
curlen = pagelist_len;
- dprintk("RPC: %s: page %d"
- " srcp 0x%p len %d curlen %d\n",
- __func__, i, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup_pg(rqst, i, srcp,
+ copy_len, curlen);
destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]);
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count;
}
-/* Caller must guarantee @rep remains stable during this call.
- */
-static void
-rpcrdma_mark_remote_invalidation(struct list_head *mws,
- struct rpcrdma_rep *rep)
-{
- struct rpcrdma_mw *mw;
-
- if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
- return;
-
- list_for_each_entry(mw, mws, mw_list)
- if (mw->mw_handle == rep->rr_inv_rkey) {
- mw->mw_flags = RPCRDMA_MW_F_RI;
- break; /* only one invalidated MR per RPC */
- }
-}
-
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
@@ -1058,26 +1026,19 @@ out_short:
static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
{
+ u32 handle;
+ u64 offset;
__be32 *p;
p = xdr_inline_decode(xdr, 4 * sizeof(*p));
if (unlikely(!p))
return -EIO;
- ifdebug(FACILITY) {
- u64 offset;
- u32 handle;
-
- handle = be32_to_cpup(p++);
- *length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
- dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
- __func__, *length, (unsigned long long)offset,
- handle);
- } else {
- *length = be32_to_cpup(p + 1);
- }
+ handle = be32_to_cpup(p++);
+ *length = be32_to_cpup(p++);
+ xdr_decode_hyper(p, &offset);
+ trace_xprtrdma_decode_seg(handle, *length, offset);
return 0;
}
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
*length += seglength;
}
- dprintk("RPC: %s: segcount=%u, %u bytes\n",
- __func__, be32_to_cpup(p), *length);
return 0;
}
@@ -1296,8 +1255,7 @@ out:
* being marshaled.
*/
out_badheader:
- dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
- rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+ trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
status = -EIO;
goto out;
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- rpcrdma_release_rqst(rep->rr_rxprt, req);
+ trace_xprtrdma_defer_cmp(rep);
+ if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+ r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep);
}
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;
- dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
-
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
@@ -1405,14 +1364,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
- dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
- __func__, rep, req, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
- if (list_empty(&req->rl_registered) &&
- !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
- rpcrdma_complete_rqst(rep);
- else
- queue_work(rpcrdma_receive_wq, &rep->rr_work);
+ queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
@@ -1424,8 +1378,7 @@ out_badstatus:
return;
out_badversion:
- dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(rep->rr_vers));
+ trace_xprtrdma_reply_vers(rep);
goto repost;
/* The RPC transaction has already been terminated, or the header
@@ -1433,12 +1386,11 @@ out_badversion:
*/
out_norqst:
spin_unlock(&xprt->recv_lock);
- dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
- __func__, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply_rqst(rep);
goto repost;
out_shortreply:
- dprintk("RPC: %s: short/invalid reply\n", __func__);
+ trace_xprtrdma_reply_short(rep);
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 646c24494ea7..4b1ecfe979cf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
+#include <linux/smp.h>
#include "xprt_rdma.h"
@@ -66,8 +67,7 @@
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
-static unsigned int xprt_rdma_inline_write_padding;
-unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -80,6 +80,7 @@ static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1;
+static unsigned int dummy;
static struct ctl_table_header *sunrpc_table_header;
@@ -113,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
},
{
.procname = "rdma_inline_write_padding",
- .data = &xprt_rdma_inline_write_padding,
+ .data = &dummy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -258,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt_clear_connected(xprt);
- dprintk("RPC: %s: %sconnect\n", __func__,
- r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
if (rc)
xprt_wake_pending_tasks(xprt, rc);
- dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
}
@@ -274,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
rx_xprt);
- pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
+ trace_xprtrdma_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id);
}
@@ -294,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- dprintk("RPC: %s: called\n", __func__);
+ trace_xprtrdma_destroy(r_xprt);
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
@@ -305,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
-
xprt_free(xprt);
- dprintk("RPC: %s: returning\n", __func__);
-
module_put(THIS_MODULE);
}
@@ -360,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
/*
* Set up RDMA-specific connect data.
*/
-
- sap = (struct sockaddr *)&cdata.addr;
- memcpy(sap, args->dstaddr, args->addrlen);
+ sap = args->dstaddr;
/* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */
@@ -372,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
if (rpc_get_port(sap))
xprt_set_bound(xprt);
+ xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt->max_reqs;
@@ -386,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize;
- cdata.padding = xprt_rdma_inline_write_padding;
-
/*
* Create new transport instance, which includes initialized
* o ia
@@ -397,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt);
- rc = rpcrdma_ia_open(new_xprt, sap);
+ rc = rpcrdma_ia_open(new_xprt);
if (rc)
goto out1;
@@ -406,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
*/
new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep;
- new_ep->rep_remote_addr = cdata.addr;
rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data);
if (rc)
goto out2;
- /*
- * Allocate pre-registered send and receive buffers for headers and
- * any inline data. Also specify any padding which will be provided
- * from a preregistered zero buffer.
- */
rc = rpcrdma_buffer_create(new_xprt);
if (rc)
goto out3;
- /*
- * Register a callback for connection events. This is necessary because
- * connection loss notification is async. We also catch connection loss
- * when reaping receives.
- */
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker);
- xprt_rdma_format_addresses(xprt, sap);
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
if (xprt->max_payload == 0)
goto out4;
@@ -444,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
dprintk("RPC: %s: %s:%s\n", __func__,
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT]);
+ trace_xprtrdma_create(new_xprt);
return xprt;
out4:
- xprt_rdma_free_addresses(xprt);
- rc = -EINVAL;
+ rpcrdma_buffer_destroy(&new_xprt->rx_buf);
+ rc = -ENODEV;
out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
+ trace_xprtrdma_destroy(new_xprt);
+ xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
return ERR_PTR(rc);
}
@@ -487,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
rpcrdma_ep_disconnect(ep, ia);
}
+/**
+ * xprt_rdma_set_port - update server port with rpcbind result
+ * @xprt: controlling RPC transport
+ * @port: new port value
+ *
+ * Transport connect status is unchanged.
+ */
static void
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
{
- struct sockaddr_in *sap;
+ struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
+ char buf[8];
- sap = (struct sockaddr_in *)&xprt->addr;
- sap->sin_port = htons(port);
- sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
- sap->sin_port = htons(port);
- dprintk("RPC: %s: %u\n", __func__, port);
+ dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
+ __func__, xprt,
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PORT],
+ port);
+
+ rpc_set_port(sap, port);
+
+ kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+ snprintf(buf, sizeof(buf), "%u", port);
+ xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
+
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+ snprintf(buf, sizeof(buf), "%4hx", port);
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
}
/**
@@ -515,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
static void
xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{
- dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
-
xprt_force_disconnect(xprt);
}
@@ -639,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL)
- return -ENOMEM;
+ goto out_get;
flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
@@ -652,18 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
- task->tk_pid, __func__, rqst->rq_callsize,
- rqst->rq_rcvsize, req);
-
+ req->rl_cpu = smp_processor_id();
req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
+ trace_xprtrdma_allocate(task, req);
return 0;
out_fail:
rpcrdma_buffer_put(req);
+out_get:
+ trace_xprtrdma_allocate(task, NULL);
return -ENOMEM;
}
@@ -680,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
- return;
-
- dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
-
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req);
+ trace_xprtrdma_rpc_done(task, req);
rpcrdma_buffer_put(req);
}
@@ -696,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
*
* Caller holds the transport's write lock.
*
- * Return values:
- * 0: The request has been sent
- * ENOTCONN: Caller needs to invoke connect logic then call again
- * ENOBUFS: Call again later to send the request
- * EIO: A permanent error occurred. The request was not sent,
- * and don't try it again
- *
- * send_request invokes the meat of RPC RDMA. It must do the following:
- *
- * 1. Marshal the RPC request into an RPC RDMA request, which means
- * putting a header in front of data, and creating IOVs for RDMA
- * from those in the request.
- * 2. In marshaling, detect opportunities for RDMA, and use them.
- * 3. Post a recv message to set up asynch completion, then send
- * the request (rpcrdma_ep_post).
- * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-ENOBUFS if the caller should call again later
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
*/
static int
xprt_rdma_send_request(struct rpc_task *task)
@@ -722,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (unlikely(!rqst->rq_buffer))
+ return xprt_rdma_bc_send_reply(rqst);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
if (!xprt_connected(xprt))
goto drop_connection;
- /* On retransmit, remove any previously registered chunks */
- if (unlikely(!list_empty(&req->rl_registered)))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
-
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
@@ -742,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
- set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
+ __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -902,8 +887,7 @@ int xprt_rdma_init(void)
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
- dprintk("\tPadding %d\n\tMemreg %d\n",
- xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+ dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 710b3f77db82..f4eb63e8e689 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -71,8 +71,8 @@
/*
* internal functions
*/
-static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
-static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
struct workqueue_struct *recv_wq;
recv_wq = alloc_workqueue("xprtrdma_receive",
- WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
+ WQ_MEM_RECLAIM | WQ_HIGHPRI,
0);
if (!recv_wq)
return -ENOMEM;
@@ -108,7 +108,10 @@ static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;
+ struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
+ rx_ep);
+ trace_xprtrdma_qp_error(r_xprt, event);
pr_err("rpcrdma: %s on device %s ep %p\n",
ib_event_msg(event->event), event->device->name, context);
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_send(sc, wc);
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rr_cqe);
/* WARNING: Only wr_id and status are reliable at this point */
+ trace_xprtrdma_wc_receive(rep, wc);
if (wc->status != IB_WC_SUCCESS)
goto out_fail;
/* status == SUCCESS means all fields in wc are trustworthy */
- dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
- __func__, rep, wc->byte_len);
-
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
unsigned int rsize, wsize;
/* Default settings for RPC-over-RDMA Version One */
- r_xprt->rx_ia.ri_reminv_expected = false;
r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
if (pmsg &&
pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
- r_xprt->rx_ia.ri_reminv_expected = true;
r_xprt->rx_ia.ri_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
struct rpcrdma_xprt *xprt = id->context;
struct rpcrdma_ia *ia = &xprt->rx_ia;
struct rpcrdma_ep *ep = &xprt->rx_ep;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
-#endif
int connstate = 0;
+ trace_xprtrdma_conn_upcall(xprt, event);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_ADDR_ERROR:
ia->ri_async_rc = -EHOSTUNREACH;
- dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
- __func__, ep);
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
ia->ri_async_rc = -ENETUNREACH;
- dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
- __func__, ep);
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- pr_info("rpcrdma: removing device %s for %pIS:%u\n",
+ pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name,
- sap, rpc_get_port(sap));
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV;
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENETDOWN;
goto connected;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
- sap, rpc_get_port(sap),
+ dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
rdma_reject_msg(id, event->status));
connstate = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
@@ -287,8 +281,9 @@ connected:
wake_up_all(&ep->rep_connect_wait);
/*FALLTHROUGH*/
default:
- dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
- __func__, sap, rpc_get_port(sap),
+ dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
+ __func__,
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
ia->ri_device->name, ia->ri_ops->ro_displayname,
ep, rdma_event_msg(event->event));
break;
@@ -298,13 +293,14 @@ connected:
}
static struct rdma_cm_id *
-rpcrdma_create_id(struct rpcrdma_xprt *xprt,
- struct rpcrdma_ia *ia, struct sockaddr *addr)
+rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
{
unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
struct rdma_cm_id *id;
int rc;
+ trace_xprtrdma_conn_start(xprt);
+
init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
ia->ri_async_rc = -ETIMEDOUT;
- rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+ rc = rdma_resolve_addr(id, NULL,
+ (struct sockaddr *)&xprt->rx_xprt.addr,
+ RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc);
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
- dprintk("RPC: %s: wait() exited: %i\n",
- __func__, rc);
+ trace_xprtrdma_conn_tout(xprt);
goto out;
}
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
- dprintk("RPC: %s: wait() exited: %i\n",
- __func__, rc);
+ trace_xprtrdma_conn_tout(xprt);
goto out;
}
rc = ia->ri_async_rc;
@@ -365,19 +361,18 @@ out:
/**
* rpcrdma_ia_open - Open and initialize an Interface Adapter.
- * @xprt: controlling transport
- * @addr: IP address of remote peer
+ * @xprt: transport with IA to (re)initialize
*
* Returns 0 on success, negative errno if an appropriate
* Interface Adapter could not be found and opened.
*/
int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
int rc;
- ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+ ia->ri_id = rpcrdma_create_id(xprt, ia);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
goto out_err;
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
}
switch (xprt_rdma_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRWR:
if (frwr_is_supported(ia)) {
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
break;
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
/* Allow waiters to continue */
complete(&ia->ri_remove_done);
+
+ trace_xprtrdma_remove(r_xprt);
}
/**
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
void
rpcrdma_ia_close(struct rpcrdma_ia *ia)
{
- dprintk("RPC: %s: entering\n", __func__);
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id);
@@ -630,9 +626,6 @@ out1:
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- dprintk("RPC: %s: entering, connected is %d\n",
- __func__, ep->rep_connected);
-
cancel_delayed_work_sync(&ep->rep_connect_worker);
if (ia->ri_id->qp) {
@@ -653,13 +646,12 @@ static int
rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
int rc, err;
- pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
+ trace_xprtrdma_reinsert(r_xprt);
rc = -EHOSTUNREACH;
- if (rpcrdma_ia_open(r_xprt, sap))
+ if (rpcrdma_ia_open(r_xprt))
goto out1;
rc = -ENOMEM;
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
goto out3;
}
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
return 0;
out3:
@@ -691,16 +683,15 @@ static int
rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
struct rdma_cm_id *id, *old;
int err, rc;
- dprintk("RPC: %s: reconnecting...\n", __func__);
+ trace_xprtrdma_reconnect(r_xprt);
rpcrdma_ep_disconnect(ep, ia);
rc = -EHOSTUNREACH;
- id = rpcrdma_create_id(r_xprt, ia, sap);
+ id = rpcrdma_create_id(r_xprt, ia);
if (IS_ERR(id))
goto out;
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
int rc;
rc = rdma_disconnect(ia->ri_id);
- if (!rc) {
+ if (!rc)
/* returns without wait if not connected */
wait_event_interruptible(ep->rep_connect_wait,
ep->rep_connected != 1);
- dprintk("RPC: %s: after wait, %sconnected\n", __func__,
- (ep->rep_connected == 1) ? "still " : "dis");
- } else {
- dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
+ else
ep->rep_connected = rc;
- }
+ trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
+ rx_ep), rc);
ib_drain_qp(ia->ri_id->qp);
}
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
{
struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
rb_recovery_worker.work);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
spin_lock(&buf->rb_recovery_lock);
while (!list_empty(&buf->rb_stale_mrs)) {
- mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
+ mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
- dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
- mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
+ trace_xprtrdma_recover_mr(mr);
+ mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
spin_lock(&buf->rb_recovery_lock);
}
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
}
void
-rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
+rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
spin_lock(&buf->rb_recovery_lock);
- rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
+ rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
schedule_delayed_work(&buf->rb_recovery_worker, 0);
}
static void
-rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
+rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(all);
for (count = 0; count < 32; count++) {
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int rc;
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
break;
- rc = ia->ri_ops->ro_init_mr(ia, mw);
+ rc = ia->ri_ops->ro_init_mr(ia, mr);
if (rc) {
- kfree(mw);
+ kfree(mr);
break;
}
- mw->mw_xprt = r_xprt;
+ mr->mr_xprt = r_xprt;
- list_add(&mw->mw_list, &free);
- list_add(&mw->mw_all, &all);
+ list_add(&mr->mr_list, &free);
+ list_add(&mr->mr_all, &all);
}
- spin_lock(&buf->rb_mwlock);
- list_splice(&free, &buf->rb_mws);
+ spin_lock(&buf->rb_mrlock);
+ list_splice(&free, &buf->rb_mrs);
list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count;
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
- dprintk("RPC: %s: created %u MRs\n", __func__, count);
+ trace_xprtrdma_createmrs(r_xprt, count);
}
static void
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
}
struct rpcrdma_req *
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
return req;
}
-struct rpcrdma_rep *
+/**
+ * rpcrdma_create_rep - Allocate an rpcrdma_rep object
+ * @r_xprt: controlling transport
+ *
+ * Returns 0 on success or a negative errno on failure.
+ */
+int
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
int rc;
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
- return rep;
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_list, &buf->rb_recv_bufs);
+ spin_unlock(&buf->rb_lock);
+ return 0;
out_free:
kfree(rep);
out:
- return ERR_PTR(rc);
+ dprintk("RPC: %s: reply buffer %d alloc failed\n",
+ __func__, rc);
+ return rc;
}
int
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
- spin_lock_init(&buf->rb_mwlock);
+ spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock);
- INIT_LIST_HEAD(&buf->rb_mws);
+ INIT_LIST_HEAD(&buf->rb_mrs);
INIT_LIST_HEAD(&buf->rb_all);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_DELAYED_WORK(&buf->rb_recovery_worker,
rpcrdma_mr_recovery_worker);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
- for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
- struct rpcrdma_rep *rep;
-
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- dprintk("RPC: %s: reply buffer %d alloc failed\n",
- __func__, i);
- rc = PTR_ERR(rep);
+ for (i = 0; i <= buf->rb_max_requests; i++) {
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
goto out;
- }
- list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
rc = rpcrdma_sendctxs_create(r_xprt);
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req)
}
static void
-rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
+rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int count;
count = 0;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
while (!list_empty(&buf->rb_all)) {
- mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
- list_del(&mw->mw_all);
+ mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
+ list_del(&mr->mr_all);
- spin_unlock(&buf->rb_mwlock);
- ia->ri_ops->ro_release_mr(mw);
+ spin_unlock(&buf->rb_mrlock);
+ ia->ri_ops->ro_release_mr(mr);
count++;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
}
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
r_xprt->rx_stats.mrs_allocated = 0;
dprintk("RPC: %s: released %u MRs\n", __func__, count);
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
spin_unlock(&buf->rb_reqslock);
buf->rb_recv_count = 0;
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
}
-struct rpcrdma_mw *
-rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_mr_get - Allocate an rpcrdma_mr object
+ * @r_xprt: controlling transport
+ *
+ * Returns an initialized rpcrdma_mr or NULL if no free
+ * rpcrdma_mr objects are available.
+ */
+struct rpcrdma_mr *
+rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_mw *mw = NULL;
+ struct rpcrdma_mr *mr = NULL;
- spin_lock(&buf->rb_mwlock);
- if (!list_empty(&buf->rb_mws))
- mw = rpcrdma_pop_mw(&buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
+ if (!list_empty(&buf->rb_mrs))
+ mr = rpcrdma_mr_pop(&buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
- if (!mw)
- goto out_nomws;
- mw->mw_flags = 0;
- return mw;
+ if (!mr)
+ goto out_nomrs;
+ return mr;
-out_nomws:
- dprintk("RPC: %s: no MWs available\n", __func__);
+out_nomrs:
+ trace_xprtrdma_nomrs(r_xprt);
if (r_xprt->rx_ep.rep_connected != -ENODEV)
schedule_delayed_work(&buf->rb_refresh_worker, 0);
@@ -1315,14 +1316,39 @@ out_nomws:
return NULL;
}
+static void
+__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
+{
+ spin_lock(&buf->rb_mrlock);
+ rpcrdma_mr_push(mr, &buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
+}
+
+/**
+ * rpcrdma_mr_put - Release an rpcrdma_mr object
+ * @mr: object to release
+ *
+ */
void
-rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
+rpcrdma_mr_put(struct rpcrdma_mr *mr)
{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
+}
+
+/**
+ * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
+ * @mr: object to release
+ *
+ */
+void
+rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
+{
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- spin_lock(&buf->rb_mwlock);
- rpcrdma_push_mw(mw, &buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ trace_xprtrdma_dma_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
}
static struct rpcrdma_rep *
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
req = rpcrdma_buffer_get_req_locked(buffers);
req->rl_reply = rpcrdma_buffer_get_rep(buffers);
spin_unlock(&buffers->rb_lock);
+
return req;
out_reqbuf:
spin_unlock(&buffers->rb_lock);
- pr_warn("RPC: %s: out of request buffers\n", __func__);
return NULL;
}
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
req->rl_reply = NULL;
}
- dprintk("RPC: %s: posting %d s/g entries\n",
- __func__, send_wr->num_sge);
-
if (!ep->rep_send_count ||
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
send_wr->send_flags &= ~IB_SEND_SIGNALED;
--ep->rep_send_count;
}
+
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
+ trace_xprtrdma_post_send(req, rc);
if (rc)
- goto out_postsend_err;
+ return -ENOTCONN;
return 0;
-
-out_postsend_err:
- pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
- return -ENOTCONN;
}
int
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
goto out_map;
rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
+ trace_xprtrdma_post_recv(rep, rc);
if (rc)
- goto out_postrecv;
+ return -ENOTCONN;
return 0;
out_map:
pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
return -EIO;
-
-out_postrecv:
- pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
- return -ENOTCONN;
}
/**
* rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
* @r_xprt: transport associated with these backchannel resources
- * @min_reqs: minimum number of incoming requests expected
+ * @count: minimum number of incoming requests expected
*
* Returns zero if all requested buffers were posted, or a negative errno.
*/
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
out_reqbuf:
spin_unlock(&buffers->rb_lock);
- pr_warn("%s: no extra receive buffers\n", __func__);
+ trace_xprtrdma_noreps(r_xprt);
return -ENOMEM;
out_rc:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 51686d9eac5f..69883a960a3f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,11 +73,10 @@ struct rpcrdma_ia {
struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
- unsigned int ri_max_frmr_depth;
+ unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
- bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
@@ -101,7 +100,6 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
};
@@ -232,29 +230,29 @@ enum {
};
/*
- * struct rpcrdma_mw - external memory region metadata
+ * struct rpcrdma_mr - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
- * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
+ * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
-enum rpcrdma_frmr_state {
- FRMR_IS_INVALID, /* ready to be used */
- FRMR_IS_VALID, /* in use */
- FRMR_FLUSHED_FR, /* flushed FASTREG WR */
- FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
+enum rpcrdma_frwr_state {
+ FRWR_IS_INVALID, /* ready to be used */
+ FRWR_IS_VALID, /* in use */
+ FRWR_FLUSHED_FR, /* flushed FASTREG WR */
+ FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
};
-struct rpcrdma_frmr {
+struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
- enum rpcrdma_frmr_state fr_state;
+ enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
u64 *fm_physaddrs;
};
-struct rpcrdma_mw {
- struct list_head mw_list;
- struct scatterlist *mw_sg;
- int mw_nents;
- enum dma_data_direction mw_dir;
- unsigned long mw_flags;
+struct rpcrdma_mr {
+ struct list_head mr_list;
+ struct scatterlist *mr_sg;
+ int mr_nents;
+ enum dma_data_direction mr_dir;
union {
struct rpcrdma_fmr fmr;
- struct rpcrdma_frmr frmr;
+ struct rpcrdma_frwr frwr;
};
- struct rpcrdma_xprt *mw_xprt;
- u32 mw_handle;
- u32 mw_length;
- u64 mw_offset;
- struct list_head mw_all;
-};
-
-/* mw_flags */
-enum {
- RPCRDMA_MW_F_RI = 1,
+ struct rpcrdma_xprt *mr_xprt;
+ u32 mr_handle;
+ u32 mr_length;
+ u64 mr_offset;
+ struct list_head mr_all;
};
/*
@@ -342,6 +334,7 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
+ int rl_cpu;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
@@ -361,8 +354,7 @@ struct rpcrdma_req {
/* rl_flags */
enum {
- RPCRDMA_REQ_F_BACKCHANNEL = 0,
- RPCRDMA_REQ_F_PENDING,
+ RPCRDMA_REQ_F_PENDING = 0,
RPCRDMA_REQ_F_TX_RESOURCES,
};
@@ -373,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
}
static inline struct rpcrdma_req *
-rpcr_to_rdmar(struct rpc_rqst *rqst)
+rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
return rqst->rq_xprtdata;
}
static inline void
-rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
+rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
{
- list_add_tail(&mw->mw_list, list);
+ list_add_tail(&mr->mr_list, list);
}
-static inline struct rpcrdma_mw *
-rpcrdma_pop_mw(struct list_head *list)
+static inline struct rpcrdma_mr *
+rpcrdma_mr_pop(struct list_head *list)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
- mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
- list_del(&mw->mw_list);
- return mw;
+ mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
+ list_del(&mr->mr_list);
+ return mr;
}
/*
@@ -401,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
- spinlock_t rb_mwlock; /* protect rb_mws list */
- struct list_head rb_mws;
+ spinlock_t rb_mrlock; /* protect rb_mrs list */
+ struct list_head rb_mrs;
struct list_head rb_all;
unsigned long rb_sc_head;
@@ -437,13 +429,11 @@ struct rpcrdma_buffer {
* This data should be set with mount options
*/
struct rpcrdma_create_data_internal {
- struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */
- unsigned int padding; /* non-rdma write header padding */
};
/*
@@ -483,17 +473,19 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
- struct rpcrdma_mw **);
+ struct rpcrdma_mr **);
+ void (*ro_reminv)(struct rpcrdma_rep *rep,
+ struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_recover_mr)(struct rpcrdma_mw *);
+ void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init_mr)(struct rpcrdma_ia *,
- struct rpcrdma_mw *);
- void (*ro_release_mr)(struct rpcrdma_mw *);
+ struct rpcrdma_mr *);
+ void (*ro_release_mr)(struct rpcrdma_mr *mr);
const char *ro_displayname;
const int ro_send_w_inv_ok;
};
@@ -524,6 +516,18 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+static inline const char *
+rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
+}
+
+static inline const char *
+rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
+}
+
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
@@ -537,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
-int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
@@ -563,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
-struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
+int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
-struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
-void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
+struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_mr_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
-void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
-
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
@@ -662,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -670,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
+
+#include <trace/events/rpcrdma.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9cc850c2719e..18803021f242 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
#include "sunrpc.h"
+#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
+
static void xs_close(struct rpc_xprt *xprt);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport)
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
out:
mutex_unlock(&transport->recv_mutex);
@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
out:
mutex_unlock(&transport->recv_mutex);
@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
.offset = offset,
.count = len,
};
+ size_t ret;
dprintk("RPC: xs_tcp_data_recv started\n");
do {
@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
/* Skip over any trailing bytes on short reads */
xs_tcp_read_discard(transport, &desc);
} while (desc.count);
+ ret = len - desc.count;
+ if (ret < rd_desc->count)
+ rd_desc->count -= ret;
+ else
+ rd_desc->count = 0;
trace_xs_tcp_data_recv(transport);
dprintk("RPC: xs_tcp_data_recv done\n");
- return len - desc.count;
+ return ret;
}
static void xs_tcp_data_receive(struct sock_xprt *transport)
@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk;
read_descriptor_t rd_desc = {
- .count = 2*1024*1024,
.arg.data = xprt,
};
unsigned long total = 0;
- int loop;
int read = 0;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
goto out;
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- for (loop = 0; loop < 64; loop++) {
+ for (;;) {
+ rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
lock_sock(sk);
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
- if (read <= 0) {
+ if (rd_desc.count != 0 || read < 0) {
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
release_sock(sk);
break;
}
release_sock(sk);
total += read;
- rd_desc.count = 65536;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
queue_work(xprtiod_workqueue, &transport->recv_worker);
@@ -2440,7 +2464,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
*/
case -ECONNREFUSED:
case -ECONNRESET:
+ case -ENETDOWN:
case -ENETUNREACH:
+ case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
/*
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 329325bd553e..37892b3909af 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -1,7 +1,7 @@
/*
* net/tipc/bcast.c: TIPC broadcast code
*
- * Copyright (c) 2004-2006, 2014-2016, Ericsson AB
+ * Copyright (c) 2004-2006, 2014-2017, Ericsson AB
* Copyright (c) 2004, Intel Corporation.
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
@@ -42,8 +42,8 @@
#include "link.h"
#include "name_table.h"
-#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
-#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
+#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
+#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
const char tipc_bclink_name[] = "broadcast-link";
@@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net)
return tipc_net(net)->bcbase;
}
+/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link
+ * Note: the MTU is decremented to give room for a tunnel header, in
+ * case the message needs to be sent as replicast
+ */
int tipc_bcast_get_mtu(struct net *net)
{
return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE;
@@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net)
spin_lock_init(&tipc_net(net)->bclock);
if (!tipc_link_bc_create(net, 0, 0,
- U16_MAX,
+ FB_MTU,
BCLINK_WIN_DEFAULT,
0,
&bb->inputq,
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce..c8001471da6c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -324,6 +324,7 @@ restart:
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
+ kfree(b);
return -EINVAL;
}
@@ -347,8 +348,10 @@ restart:
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
- if (tipc_mon_create(net, bearer_id))
+ if (tipc_mon_create(net, bearer_id)) {
+ bearer_disable(net, b);
return -ENOMEM;
+ }
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 964342689f2c..20b21af2ff14 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -49,7 +49,6 @@
#include <linux/uaccess.h>
#include <linux/interrupt.h>
#include <linux/atomic.h>
-#include <asm/hardirq.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/list.h>
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 12777cac638a..122162a31816 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -49,8 +49,6 @@
#define ADV_ACTIVE (ADV_UNIT * 12)
enum mbr_state {
- MBR_QUARANTINED,
- MBR_DISCOVERED,
MBR_JOINING,
MBR_PUBLISHED,
MBR_JOINED,
@@ -64,8 +62,7 @@ enum mbr_state {
struct tipc_member {
struct rb_node tree_node;
struct list_head list;
- struct list_head congested;
- struct sk_buff *event_msg;
+ struct list_head small_win;
struct sk_buff_head deferredq;
struct tipc_group *group;
u32 node;
@@ -77,21 +74,18 @@ struct tipc_member {
u16 bc_rcv_nxt;
u16 bc_syncpt;
u16 bc_acked;
- bool usr_pending;
};
struct tipc_group {
struct rb_root members;
- struct list_head congested;
+ struct list_head small_win;
struct list_head pending;
struct list_head active;
- struct list_head reclaiming;
struct tipc_nlist dests;
struct net *net;
int subid;
u32 type;
u32 instance;
- u32 domain;
u32 scope;
u32 portid;
u16 member_cnt;
@@ -99,6 +93,7 @@ struct tipc_group {
u16 max_active;
u16 bc_snd_nxt;
u16 bc_ackers;
+ bool *open;
bool loopback;
bool events;
};
@@ -106,10 +101,21 @@ struct tipc_group {
static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
int mtyp, struct sk_buff_head *xmitq);
+static void tipc_group_open(struct tipc_member *m, bool *wakeup)
+{
+ *wakeup = false;
+ if (list_empty(&m->small_win))
+ return;
+ list_del_init(&m->small_win);
+ *m->group->open = true;
+ *wakeup = true;
+}
+
static void tipc_group_decr_active(struct tipc_group *grp,
struct tipc_member *m)
{
- if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+ m->state == MBR_REMITTED)
grp->active_cnt--;
}
@@ -136,14 +142,14 @@ u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
return grp->bc_snd_nxt;
}
-static bool tipc_group_is_enabled(struct tipc_member *m)
+static bool tipc_group_is_receiver(struct tipc_member *m)
{
- return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
+ return m && m->state != MBR_JOINING && m->state != MBR_LEAVING;
}
-static bool tipc_group_is_receiver(struct tipc_member *m)
+static bool tipc_group_is_sender(struct tipc_member *m)
{
- return m && m->state >= MBR_JOINED;
+ return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED;
}
u32 tipc_group_exclude(struct tipc_group *grp)
@@ -159,8 +165,11 @@ int tipc_group_size(struct tipc_group *grp)
}
struct tipc_group *tipc_group_create(struct net *net, u32 portid,
- struct tipc_group_req *mreq)
+ struct tipc_group_req *mreq,
+ bool *group_is_open)
{
+ u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS;
+ bool global = mreq->scope != TIPC_NODE_SCOPE;
struct tipc_group *grp;
u32 type = mreq->type;
@@ -168,25 +177,41 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid,
if (!grp)
return NULL;
tipc_nlist_init(&grp->dests, tipc_own_addr(net));
- INIT_LIST_HEAD(&grp->congested);
+ INIT_LIST_HEAD(&grp->small_win);
INIT_LIST_HEAD(&grp->active);
INIT_LIST_HEAD(&grp->pending);
- INIT_LIST_HEAD(&grp->reclaiming);
grp->members = RB_ROOT;
grp->net = net;
grp->portid = portid;
- grp->domain = addr_domain(net, mreq->scope);
grp->type = type;
grp->instance = mreq->instance;
grp->scope = mreq->scope;
grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
- if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
+ grp->open = group_is_open;
+ filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE;
+ if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0,
+ filter, &grp->subid))
return grp;
kfree(grp);
return NULL;
}
+void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf)
+{
+ struct rb_root *tree = &grp->members;
+ struct tipc_member *m, *tmp;
+ struct sk_buff_head xmitq;
+
+ skb_queue_head_init(&xmitq);
+ rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq);
+ tipc_group_update_member(m, 0);
+ }
+ tipc_node_distr_xmit(net, &xmitq);
+ *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+}
+
void tipc_group_delete(struct net *net, struct tipc_group *grp)
{
struct rb_root *tree = &grp->members;
@@ -232,7 +257,7 @@ static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
struct tipc_member *m;
m = tipc_group_find_member(grp, node, port);
- if (m && tipc_group_is_enabled(m))
+ if (m && tipc_group_is_receiver(m))
return m;
return NULL;
}
@@ -277,7 +302,7 @@ static void tipc_group_add_to_tree(struct tipc_group *grp,
static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
u32 node, u32 port,
- int state)
+ u32 instance, int state)
{
struct tipc_member *m;
@@ -285,11 +310,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
if (!m)
return NULL;
INIT_LIST_HEAD(&m->list);
- INIT_LIST_HEAD(&m->congested);
+ INIT_LIST_HEAD(&m->small_win);
__skb_queue_head_init(&m->deferredq);
m->group = grp;
m->node = node;
m->port = port;
+ m->instance = instance;
m->bc_acked = grp->bc_snd_nxt - 1;
grp->member_cnt++;
tipc_group_add_to_tree(grp, m);
@@ -298,9 +324,10 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
return m;
}
-void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
+void tipc_group_add_member(struct tipc_group *grp, u32 node,
+ u32 port, u32 instance)
{
- tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
+ tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED);
}
static void tipc_group_delete_member(struct tipc_group *grp,
@@ -314,7 +341,7 @@ static void tipc_group_delete_member(struct tipc_group *grp,
grp->bc_ackers--;
list_del_init(&m->list);
- list_del_init(&m->congested);
+ list_del_init(&m->small_win);
tipc_group_decr_active(grp, m);
/* If last member on a node, remove node from dest list */
@@ -343,7 +370,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
struct tipc_group *grp = m->group;
struct tipc_member *_m, *tmp;
- if (!tipc_group_is_enabled(m))
+ if (!tipc_group_is_receiver(m))
return;
m->window -= len;
@@ -351,17 +378,14 @@ void tipc_group_update_member(struct tipc_member *m, int len)
if (m->window >= ADV_IDLE)
return;
- if (!list_empty(&m->congested))
- return;
+ list_del_init(&m->small_win);
- /* Sort member into congested members' list */
- list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
- if (m->window > _m->window)
- continue;
- list_add_tail(&m->congested, &_m->congested);
- return;
+ /* Sort member into small_window members' list */
+ list_for_each_entry_safe(_m, tmp, &grp->small_win, small_win) {
+ if (_m->window > m->window)
+ break;
}
- list_add_tail(&m->congested, &grp->congested);
+ list_add_tail(&m->small_win, &_m->small_win);
}
void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
@@ -369,18 +393,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
u16 prev = grp->bc_snd_nxt - 1;
struct tipc_member *m;
struct rb_node *n;
+ u16 ackers = 0;
for (n = rb_first(&grp->members); n; n = rb_next(n)) {
m = container_of(n, struct tipc_member, tree_node);
- if (tipc_group_is_enabled(m)) {
+ if (tipc_group_is_receiver(m)) {
tipc_group_update_member(m, len);
m->bc_acked = prev;
+ ackers++;
}
}
/* Mark number of acknowledges to expect, if any */
if (ack)
- grp->bc_ackers = grp->member_cnt;
+ grp->bc_ackers = ackers;
grp->bc_snd_nxt++;
}
@@ -392,20 +418,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
int adv, state;
m = tipc_group_find_dest(grp, dnode, dport);
- *mbr = m;
- if (!m)
+ if (!tipc_group_is_receiver(m)) {
+ *mbr = NULL;
return false;
- if (m->usr_pending)
- return true;
+ }
+ *mbr = m;
+
if (m->window >= len)
return false;
- m->usr_pending = true;
+
+ *grp->open = false;
/* If not fully advertised, do it now to prevent mutual blocking */
adv = m->advertised;
state = m->state;
- if (state < MBR_JOINED)
- return true;
if (state == MBR_JOINED && adv == ADV_IDLE)
return true;
if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
@@ -423,13 +449,14 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len)
struct tipc_member *m = NULL;
/* If prev bcast was replicast, reject until all receivers have acked */
- if (grp->bc_ackers)
+ if (grp->bc_ackers) {
+ *grp->open = false;
return true;
-
- if (list_empty(&grp->congested))
+ }
+ if (list_empty(&grp->small_win))
return false;
- m = list_first_entry(&grp->congested, struct tipc_member, congested);
+ m = list_first_entry(&grp->small_win, struct tipc_member, small_win);
if (m->window >= len)
return false;
@@ -484,7 +511,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
goto drop;
m = tipc_group_find_member(grp, node, port);
- if (!tipc_group_is_receiver(m))
+ if (!tipc_group_is_sender(m))
goto drop;
if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
@@ -497,6 +524,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
while ((skb = skb_peek(defq))) {
hdr = buf_msg(skb);
mtyp = msg_type(hdr);
+ blks = msg_blocks(hdr);
deliver = true;
ack = false;
update = false;
@@ -546,7 +574,6 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
if (!update)
continue;
- blks = msg_blocks(hdr);
tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
}
return;
@@ -561,7 +588,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
int max_active = grp->max_active;
int reclaim_limit = max_active * 3 / 4;
int active_cnt = grp->active_cnt;
- struct tipc_member *m, *rm;
+ struct tipc_member *m, *rm, *pm;
m = tipc_group_find_member(grp, node, port);
if (!m)
@@ -571,24 +598,34 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
switch (m->state) {
case MBR_JOINED:
- /* Reclaim advertised space from least active member */
- if (!list_empty(active) && active_cnt >= reclaim_limit) {
+ /* First, decide if member can go active */
+ if (active_cnt <= max_active) {
+ m->state = MBR_ACTIVE;
+ list_add_tail(&m->list, active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ } else {
+ m->state = MBR_PENDING;
+ list_add_tail(&m->list, &grp->pending);
+ }
+
+ if (active_cnt < reclaim_limit)
+ break;
+
+ /* Reclaim from oldest active member, if possible */
+ if (!list_empty(active)) {
rm = list_first_entry(active, struct tipc_member, list);
rm->state = MBR_RECLAIMING;
- list_move_tail(&rm->list, &grp->reclaiming);
+ list_del_init(&rm->list);
tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
- }
- /* If max active, become pending and wait for reclaimed space */
- if (active_cnt >= max_active) {
- m->state = MBR_PENDING;
- list_add_tail(&m->list, &grp->pending);
break;
}
- /* Otherwise become active */
- m->state = MBR_ACTIVE;
- list_add_tail(&m->list, &grp->active);
- grp->active_cnt++;
- /* Fall through */
+ /* Nobody to reclaim from; - revert oldest pending to JOINED */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ list_del_init(&pm->list);
+ pm->state = MBR_JOINED;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ break;
case MBR_ACTIVE:
if (!list_is_last(&m->list, &grp->active))
list_move_tail(&m->list, &grp->active);
@@ -600,13 +637,23 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
if (m->advertised > ADV_IDLE)
break;
m->state = MBR_JOINED;
+ grp->active_cnt--;
if (m->advertised < ADV_IDLE) {
pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
}
+
+ if (list_empty(&grp->pending))
+ return;
+
+ /* Set oldest pending member to active and advertise */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
break;
case MBR_RECLAIMING:
- case MBR_DISCOVERED:
case MBR_JOINING:
case MBR_LEAVING:
default:
@@ -614,6 +661,40 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
}
}
+static void tipc_group_create_event(struct tipc_group *grp,
+ struct tipc_member *m,
+ u32 event, u16 seqno,
+ struct sk_buff_head *inputq)
+{ u32 dnode = tipc_own_addr(grp->net);
+ struct tipc_event evt;
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+
+ evt.event = event;
+ evt.found_lower = m->instance;
+ evt.found_upper = m->instance;
+ evt.port.ref = m->port;
+ evt.port.node = m->node;
+ evt.s.seq.type = grp->type;
+ evt.s.seq.lower = m->instance;
+ evt.s.seq.upper = m->instance;
+
+ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT,
+ GROUP_H_SIZE, sizeof(evt), dnode, m->node,
+ grp->portid, m->port, 0);
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ msg_set_nametype(hdr, grp->type);
+ msg_set_grp_evt(hdr, event);
+ msg_set_dest_droppable(hdr, true);
+ msg_set_grp_bc_seqno(hdr, seqno);
+ memcpy(msg_data(hdr), &evt, sizeof(evt));
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ __skb_queue_tail(inputq, skb);
+}
+
static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
int mtyp, struct sk_buff_head *xmitq)
{
@@ -648,6 +729,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
} else if (mtyp == GRP_REMIT_MSG) {
msg_set_grp_remitted(hdr, m->window);
}
+ msg_set_dest_droppable(hdr, true);
__skb_queue_tail(xmitq, skb);
}
@@ -658,108 +740,95 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
u32 node = msg_orignode(hdr);
u32 port = msg_origport(hdr);
struct tipc_member *m, *pm;
- struct tipc_msg *ehdr;
u16 remitted, in_flight;
if (!grp)
return;
+ if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net))
+ return;
+
m = tipc_group_find_member(grp, node, port);
switch (msg_type(hdr)) {
case GRP_JOIN_MSG:
if (!m)
m = tipc_group_create_member(grp, node, port,
- MBR_QUARANTINED);
+ 0, MBR_JOINING);
if (!m)
return;
m->bc_syncpt = msg_grp_bc_syncpt(hdr);
m->bc_rcv_nxt = m->bc_syncpt;
m->window += msg_adv_win(hdr);
- /* Wait until PUBLISH event is received */
- if (m->state == MBR_DISCOVERED) {
- m->state = MBR_JOINING;
- } else if (m->state == MBR_PUBLISHED) {
- m->state = MBR_JOINED;
- *usr_wakeup = true;
- m->usr_pending = false;
- tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
- ehdr = buf_msg(m->event_msg);
- msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
- __skb_queue_tail(inputq, m->event_msg);
- }
- if (m->window < ADV_IDLE)
- tipc_group_update_member(m, 0);
- else
- list_del_init(&m->congested);
+ /* Wait until PUBLISH event is received if necessary */
+ if (m->state != MBR_PUBLISHED)
+ return;
+
+ /* Member can be taken into service */
+ m->state = MBR_JOINED;
+ tipc_group_open(m, usr_wakeup);
+ tipc_group_update_member(m, 0);
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ tipc_group_create_event(grp, m, TIPC_PUBLISHED,
+ m->bc_syncpt, inputq);
return;
case GRP_LEAVE_MSG:
if (!m)
return;
m->bc_syncpt = msg_grp_bc_syncpt(hdr);
-
- /* Wait until WITHDRAW event is received */
- if (m->state != MBR_LEAVING) {
- tipc_group_decr_active(grp, m);
- m->state = MBR_LEAVING;
- return;
- }
- /* Otherwise deliver already received WITHDRAW event */
- ehdr = buf_msg(m->event_msg);
- msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
- __skb_queue_tail(inputq, m->event_msg);
- *usr_wakeup = true;
- list_del_init(&m->congested);
+ list_del_init(&m->list);
+ tipc_group_open(m, usr_wakeup);
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
+ m->bc_syncpt, inputq);
return;
case GRP_ADV_MSG:
if (!m)
return;
m->window += msg_adv_win(hdr);
- *usr_wakeup = m->usr_pending;
- m->usr_pending = false;
- list_del_init(&m->congested);
+ tipc_group_open(m, usr_wakeup);
return;
case GRP_ACK_MSG:
if (!m)
return;
m->bc_acked = msg_grp_bc_acked(hdr);
if (--grp->bc_ackers)
- break;
+ return;
+ list_del_init(&m->small_win);
+ *m->group->open = true;
*usr_wakeup = true;
- m->usr_pending = false;
+ tipc_group_update_member(m, 0);
return;
case GRP_RECLAIM_MSG:
if (!m)
return;
- *usr_wakeup = m->usr_pending;
- m->usr_pending = false;
tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
m->window = ADV_IDLE;
+ tipc_group_open(m, usr_wakeup);
return;
case GRP_REMIT_MSG:
if (!m || m->state != MBR_RECLAIMING)
return;
- list_del_init(&m->list);
- grp->active_cnt--;
remitted = msg_grp_remitted(hdr);
/* Messages preceding the REMIT still in receive queue */
if (m->advertised > remitted) {
m->state = MBR_REMITTED;
in_flight = m->advertised - remitted;
+ m->advertised = ADV_IDLE + in_flight;
+ return;
}
- /* All messages preceding the REMIT have been read */
- if (m->advertised <= remitted) {
- m->state = MBR_JOINED;
- in_flight = 0;
- }
- /* ..and the REMIT overtaken by more messages => re-advertise */
+ /* This should never happen */
if (m->advertised < remitted)
- tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ pr_warn_ratelimited("Unexpected REMIT msg\n");
- m->advertised = ADV_IDLE + in_flight;
+ /* All messages preceding the REMIT have been read */
+ m->state = MBR_JOINED;
+ grp->active_cnt--;
+ m->advertised = ADV_IDLE;
/* Set oldest pending member to active and advertise */
if (list_empty(&grp->pending))
@@ -781,11 +850,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
void tipc_group_member_evt(struct tipc_group *grp,
bool *usr_wakeup,
int *sk_rcvbuf,
- struct sk_buff *skb,
+ struct tipc_msg *hdr,
struct sk_buff_head *inputq,
struct sk_buff_head *xmitq)
{
- struct tipc_msg *hdr = buf_msg(skb);
struct tipc_event *evt = (void *)msg_data(hdr);
u32 instance = evt->found_lower;
u32 node = evt->port.node;
@@ -793,79 +861,59 @@ void tipc_group_member_evt(struct tipc_group *grp,
int event = evt->event;
struct tipc_member *m;
struct net *net;
- bool node_up;
u32 self;
if (!grp)
- goto drop;
+ return;
net = grp->net;
self = tipc_own_addr(net);
if (!grp->loopback && node == self && port == grp->portid)
- goto drop;
-
- /* Convert message before delivery to user */
- msg_set_hdr_sz(hdr, GROUP_H_SIZE);
- msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
- msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
- msg_set_origport(hdr, port);
- msg_set_orignode(hdr, node);
- msg_set_nametype(hdr, grp->type);
- msg_set_grp_evt(hdr, event);
+ return;
m = tipc_group_find_member(grp, node, port);
- if (event == TIPC_PUBLISHED) {
- if (!m)
- m = tipc_group_create_member(grp, node, port,
- MBR_DISCOVERED);
- if (!m)
- goto drop;
-
- /* Hold back event if JOIN message not yet received */
- if (m->state == MBR_DISCOVERED) {
- m->event_msg = skb;
- m->state = MBR_PUBLISHED;
- } else {
- msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
- __skb_queue_tail(inputq, skb);
- m->state = MBR_JOINED;
- *usr_wakeup = true;
- m->usr_pending = false;
+ switch (event) {
+ case TIPC_PUBLISHED:
+ /* Send and wait for arrival of JOIN message if necessary */
+ if (!m) {
+ m = tipc_group_create_member(grp, node, port, instance,
+ MBR_PUBLISHED);
+ if (!m)
+ break;
+ tipc_group_update_member(m, 0);
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ break;
}
+
+ if (m->state != MBR_JOINING)
+ break;
+
+ /* Member can be taken into service */
m->instance = instance;
- TIPC_SKB_CB(skb)->orig_member = m->instance;
+ m->state = MBR_JOINED;
+ tipc_group_open(m, usr_wakeup);
+ tipc_group_update_member(m, 0);
tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
- if (m->window < ADV_IDLE)
- tipc_group_update_member(m, 0);
- else
- list_del_init(&m->congested);
- } else if (event == TIPC_WITHDRAWN) {
+ tipc_group_create_event(grp, m, TIPC_PUBLISHED,
+ m->bc_syncpt, inputq);
+ break;
+ case TIPC_WITHDRAWN:
if (!m)
- goto drop;
+ break;
- TIPC_SKB_CB(skb)->orig_member = m->instance;
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ list_del_init(&m->list);
+ tipc_group_open(m, usr_wakeup);
- *usr_wakeup = true;
- m->usr_pending = false;
- node_up = tipc_node_is_up(net, node);
-
- /* Hold back event if more messages might be expected */
- if (m->state != MBR_LEAVING && node_up) {
- m->event_msg = skb;
- tipc_group_decr_active(grp, m);
- m->state = MBR_LEAVING;
- } else {
- if (node_up)
- msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
- else
- msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
- __skb_queue_tail(inputq, skb);
- }
- list_del_init(&m->congested);
+ /* Only send event if no LEAVE message can be expected */
+ if (!tipc_node_is_up(net, node))
+ tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
+ m->bc_rcv_nxt, inputq);
+ break;
+ default:
+ break;
}
*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
- return;
-drop:
- kfree_skb(skb);
}
diff --git a/net/tipc/group.h b/net/tipc/group.h
index d525e1cd7de5..5996af6e9f1d 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -43,9 +43,12 @@ struct tipc_member;
struct tipc_msg;
struct tipc_group *tipc_group_create(struct net *net, u32 portid,
- struct tipc_group_req *mreq);
+ struct tipc_group_req *mreq,
+ bool *group_is_open);
+void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf);
void tipc_group_delete(struct net *net, struct tipc_group *grp);
-void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
+void tipc_group_add_member(struct tipc_group *grp, u32 node,
+ u32 port, u32 instance);
struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
int *scope);
@@ -54,7 +57,7 @@ void tipc_group_filter_msg(struct tipc_group *grp,
struct sk_buff_head *inputq,
struct sk_buff_head *xmitq);
void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
- int *sk_rcvbuf, struct sk_buff *skb,
+ int *sk_rcvbuf, struct tipc_msg *hdr,
struct sk_buff_head *inputq,
struct sk_buff_head *xmitq);
void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
@@ -69,5 +72,4 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
u32 port, struct sk_buff_head *xmitq);
u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
void tipc_group_update_member(struct tipc_member *m, int len);
-int tipc_group_size(struct tipc_group *grp);
#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6bce0b1117bd..2d6b2aed30e0 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -483,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
/**
* tipc_link_bc_create - create new link to be used for broadcast
* @n: pointer to associated node
- * @mtu: mtu to be used
+ * @mtu: mtu to be used initially if no peers
* @window: send window to be used
* @inputq: queue to put messages ready for delivery
* @namedq: queue to put binding table update messages ready for delivery
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 8e884ed06d4b..32dc33a94bc7 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *self;
struct tipc_peer *peer, *tmp;
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
write_lock_bh(&mon->lock);
tn->monitors[bearer_id] = NULL;
list_for_each_entry_safe(peer, tmp, &self->list, list) {
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index b0d07b35909d..55d8ba92291d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -251,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff **_skb)
* @pktmax: Max packet size that can be used
* @list: Buffer or chain of buffers to be returned to caller
*
+ * Note that the recursive call we are making here is safe, since it can
+ * logically go only one further level down.
+ *
* Returns message data size or errno: -ENOMEM, -EFAULT
*/
-int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
- int offset, int dsz, int pktmax, struct sk_buff_head *list)
+int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
+ int dsz, int pktmax, struct sk_buff_head *list)
{
int mhsz = msg_hdr_sz(mhdr);
+ struct tipc_msg pkthdr;
int msz = mhsz + dsz;
- int pktno = 1;
- int pktsz;
int pktrem = pktmax;
- int drem = dsz;
- struct tipc_msg pkthdr;
struct sk_buff *skb;
+ int drem = dsz;
+ int pktno = 1;
char *pktpos;
+ int pktsz;
int rc;
msg_set_size(mhdr, msz);
@@ -272,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
/* No fragmentation needed? */
if (likely(msz <= pktmax)) {
skb = tipc_buf_acquire(msz, GFP_KERNEL);
- if (unlikely(!skb))
+
+ /* Fall back to smaller MTU if node local message */
+ if (unlikely(!skb)) {
+ if (pktmax != MAX_MSG_SIZE)
+ return -ENOMEM;
+ rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
+ if (rc != dsz)
+ return rc;
+ if (tipc_msg_assemble(list))
+ return dsz;
return -ENOMEM;
+ }
skb_orphan(skb);
__skb_queue_tail(list, skb);
skb_copy_to_linear_data(skb, mhdr, mhsz);
@@ -589,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
return true;
}
+/* tipc_msg_assemble() - assemble chain of fragments into one message
+ */
+bool tipc_msg_assemble(struct sk_buff_head *list)
+{
+ struct sk_buff *skb, *tmp = NULL;
+
+ if (skb_queue_len(list) == 1)
+ return true;
+
+ while ((skb = __skb_dequeue(list))) {
+ skb->next = NULL;
+ if (tipc_buf_append(&tmp, &skb)) {
+ __skb_queue_tail(list, skb);
+ return true;
+ }
+ if (!tmp)
+ break;
+ }
+ __skb_queue_purge(list);
+ __skb_queue_head_init(list);
+ pr_warn("Failed do assemble buffer\n");
+ return false;
+}
+
/* tipc_msg_reassemble() - clone a buffer chain of fragments and
* reassemble the clones into one message
*/
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 3e4384c222f7..b4ba1b4f9ae7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -98,7 +98,7 @@ struct plist;
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-
+#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb {
@@ -943,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
+bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index b3829bcf63c7..ed0457cc99d6 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -328,7 +328,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
TIPC_PUBLISHED, publ->ref,
- publ->node, created_subseq);
+ publ->node, publ->scope,
+ created_subseq);
}
return publ;
}
@@ -398,19 +399,21 @@ found:
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
TIPC_WITHDRAWN, publ->ref,
- publ->node, removed_subseq);
+ publ->node, publ->scope,
+ removed_subseq);
}
return publ;
}
/**
- * tipc_nameseq_subscribe - attach a subscription, and issue
- * the prescribed number of events if there is any sub-
+ * tipc_nameseq_subscribe - attach a subscription, and optionally
+ * issue the prescribed number of events if there is any sub-
* sequence overlapping with the requested sequence
*/
static void tipc_nameseq_subscribe(struct name_seq *nseq,
- struct tipc_subscription *s)
+ struct tipc_subscription *s,
+ bool status)
{
struct sub_seq *sseq = nseq->sseqs;
struct tipc_name_seq ns;
@@ -420,7 +423,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
tipc_subscrp_get(s);
list_add(&s->nameseq_list, &nseq->subscriptions);
- if (!sseq)
+ if (!status || !sseq)
return;
while (sseq != &nseq->sseqs[nseq->first_free]) {
@@ -434,6 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
sseq->upper,
TIPC_PUBLISHED,
crs->ref, crs->node,
+ crs->scope,
must_report);
must_report = 0;
}
@@ -597,7 +601,7 @@ not_found:
return ref;
}
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
struct list_head *dsts, int *dstcnt, u32 exclude,
bool all)
{
@@ -607,9 +611,6 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
struct name_seq *seq;
struct sub_seq *sseq;
- if (!tipc_in_scope(domain, self))
- return false;
-
*dstcnt = 0;
rcu_read_lock();
seq = nametbl_find_seq(net, type);
@@ -620,7 +621,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
if (likely(sseq)) {
info = sseq->info;
list_for_each_entry(publ, &info->zone_list, zone_list) {
- if (!tipc_in_scope(domain, publ->node))
+ if (publ->scope != scope)
continue;
if (publ->ref == exclude && publ->node == self)
continue;
@@ -638,13 +639,14 @@ exit:
return !list_empty(dsts);
}
-int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
- u32 limit, struct list_head *dports)
+int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports)
{
- struct name_seq *seq;
- struct sub_seq *sseq;
struct sub_seq *sseq_stop;
struct name_info *info;
+ struct publication *p;
+ struct name_seq *seq;
+ struct sub_seq *sseq;
int res = 0;
rcu_read_lock();
@@ -656,15 +658,12 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
sseq_stop = seq->sseqs + seq->first_free;
for (; sseq != sseq_stop; sseq++) {
- struct publication *publ;
-
if (sseq->lower > upper)
break;
-
info = sseq->info;
- list_for_each_entry(publ, &info->node_list, node_list) {
- if (publ->scope <= limit)
- tipc_dest_push(dports, 0, publ->ref);
+ list_for_each_entry(p, &info->node_list, node_list) {
+ if (p->scope == scope || (!exact && p->scope < scope))
+ tipc_dest_push(dports, 0, p->ref);
}
if (info->cluster_list_size != info->node_list_size)
@@ -681,8 +680,7 @@ exit:
* - Determines if any node local ports overlap
*/
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
- u32 upper, u32 domain,
- struct tipc_nlist *nodes)
+ u32 upper, struct tipc_nlist *nodes)
{
struct sub_seq *sseq, *stop;
struct publication *publ;
@@ -700,8 +698,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
for (; sseq != stop && sseq->lower <= upper; sseq++) {
info = sseq->info;
list_for_each_entry(publ, &info->zone_list, zone_list) {
- if (tipc_in_scope(domain, publ->node))
- tipc_nlist_add(nodes, publ->node);
+ tipc_nlist_add(nodes, publ->node);
}
}
spin_unlock_bh(&seq->lock);
@@ -712,7 +709,7 @@ exit:
/* tipc_nametbl_build_group - build list of communication group members
*/
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
- u32 type, u32 domain)
+ u32 type, u32 scope)
{
struct sub_seq *sseq, *stop;
struct name_info *info;
@@ -730,9 +727,9 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
for (; sseq != stop; sseq++) {
info = sseq->info;
list_for_each_entry(p, &info->zone_list, zone_list) {
- if (!tipc_in_scope(domain, p->node))
+ if (p->scope != scope)
continue;
- tipc_group_add_member(grp, p->node, p->ref);
+ tipc_group_add_member(grp, p->node, p->ref, p->lower);
}
}
spin_unlock_bh(&seq->lock);
@@ -811,7 +808,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
/**
* tipc_nametbl_subscribe - add a subscription object to the name table
*/
-void tipc_nametbl_subscribe(struct tipc_subscription *s)
+void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
{
struct tipc_net *tn = net_generic(s->net, tipc_net_id);
u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
@@ -825,7 +822,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
if (seq) {
spin_lock_bh(&seq->lock);
- tipc_nameseq_subscribe(seq, s);
+ tipc_nameseq_subscribe(seq, s, status);
spin_unlock_bh(&seq->lock);
} else {
tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 71926e429446..f56e7cb3d436 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -100,13 +100,12 @@ struct name_table {
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
- u32 limit, struct list_head *dports);
+int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports);
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
u32 type, u32 domain);
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
- u32 upper, u32 domain,
- struct tipc_nlist *nodes);
+ u32 upper, struct tipc_nlist *nodes);
bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
struct list_head *dsts, int *dstcnt, u32 exclude,
bool all);
@@ -121,7 +120,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
u32 lower, u32 node, u32 ref,
u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s);
+void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 507017fe0f1b..9036d8756e73 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1880,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
if (strcmp(name, tipc_bclink_name) == 0) {
err = tipc_nl_add_bc_link(net, &msg);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
+ if (err)
+ goto err_free;
} else {
int bearer_id;
struct tipc_node *node;
struct tipc_link *link;
node = tipc_node_find_by_name(net, name, &bearer_id);
- if (!node)
- return -EINVAL;
+ if (!node) {
+ err = -EINVAL;
+ goto err_free;
+ }
tipc_node_read_lock(node);
link = node->links[bearer_id].link;
if (!link) {
tipc_node_read_unlock(node);
- nlmsg_free(msg.skb);
- return -EINVAL;
+ err = -EINVAL;
+ goto err_free;
}
err = __tipc_nl_add_link(net, &msg, link, 0);
tipc_node_read_unlock(node);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
+ if (err)
+ goto err_free;
}
return genlmsg_reply(msg.skb, info);
+
+err_free:
+ nlmsg_free(msg.skb);
+ return err;
}
int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/server.c b/net/tipc/server.c
index acaef80fb88c..df0c563c90cd 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -132,10 +132,11 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
spin_lock_bh(&s->idr_lock);
con = idr_find(&s->conn_idr, conid);
- if (con && test_bit(CF_CONNECTED, &con->flags))
- conn_get(con);
- else
- con = NULL;
+ if (con) {
+ if (!test_bit(CF_CONNECTED, &con->flags) ||
+ !kref_get_unless_zero(&con->kref))
+ con = NULL;
+ }
spin_unlock_bh(&s->idr_lock);
return con;
}
@@ -183,35 +184,28 @@ static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
write_unlock_bh(&sk->sk_callback_lock);
}
-static void tipc_unregister_callbacks(struct tipc_conn *con)
-{
- struct sock *sk = con->sock->sk;
-
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_user_data = NULL;
- write_unlock_bh(&sk->sk_callback_lock);
-}
-
static void tipc_close_conn(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
+ struct sock *sk = con->sock->sk;
+ bool disconnect = false;
- if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
- if (con->sock)
- tipc_unregister_callbacks(con);
-
+ write_lock_bh(&sk->sk_callback_lock);
+ disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+ if (disconnect) {
+ sk->sk_user_data = NULL;
if (con->conid)
s->tipc_conn_release(con->conid, con->usr_data);
-
- /* We shouldn't flush pending works as we may be in the
- * thread. In fact the races with pending rx/tx work structs
- * are harmless for us here as we have already deleted this
- * connection from server connection list.
- */
- if (con->sock)
- kernel_sock_shutdown(con->sock, SHUT_RDWR);
- conn_put(con);
}
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ /* Handle concurrent calls from sending and receiving threads */
+ if (!disconnect)
+ return;
+
+ /* Don't flush pending works, -just let them expire */
+ kernel_sock_shutdown(con->sock, SHUT_RDWR);
+ conn_put(con);
}
static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
@@ -248,9 +242,10 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
static int tipc_receive_from_sock(struct tipc_conn *con)
{
- struct msghdr msg = {};
struct tipc_server *s = con->server;
+ struct sock *sk = con->sock->sk;
struct sockaddr_tipc addr;
+ struct msghdr msg = {};
struct kvec iov;
void *buf;
int ret;
@@ -264,19 +259,22 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
iov.iov_base = buf;
iov.iov_len = s->max_rcvbuf_size;
msg.msg_name = &addr;
- ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
- MSG_DONTWAIT);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+ ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
if (ret <= 0) {
kmem_cache_free(s->rcvbuf_cache, buf);
goto out_close;
}
- s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr,
- con->usr_data, buf, ret);
-
+ read_lock_bh(&sk->sk_callback_lock);
+ if (test_bit(CF_CONNECTED, &con->flags))
+ ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
+ &addr, con->usr_data, buf, ret);
+ read_unlock_bh(&sk->sk_callback_lock);
kmem_cache_free(s->rcvbuf_cache, buf);
-
- return 0;
+ if (ret < 0)
+ tipc_conn_terminate(s, con->conid);
+ return ret;
out_close:
if (ret != -EWOULDBLOCK)
@@ -314,6 +312,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
newcon->usr_data = s->tipc_conn_new(newcon->conid);
if (!newcon->usr_data) {
sock_release(newsock);
+ conn_put(newcon);
return -ENOMEM;
}
@@ -488,8 +487,8 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
}
}
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
- u32 lower, u32 upper, int *conid)
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid)
{
struct tipc_subscriber *scbr;
struct tipc_subscr sub;
@@ -500,7 +499,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
sub.seq.lower = lower;
sub.seq.upper = upper;
sub.timeout = TIPC_WAIT_FOREVER;
- sub.filter = TIPC_SUB_PORTS;
+ sub.filter = filter;
*(u32 *)&sub.usr_handle = port;
con = tipc_alloc_conn(tipc_topsrv(net));
@@ -511,7 +510,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
s = con->server;
scbr = s->tipc_conn_new(*conid);
if (!scbr) {
- tipc_close_conn(con);
+ conn_put(con);
return false;
}
@@ -524,11 +523,17 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
{
struct tipc_conn *con;
+ struct tipc_server *srv;
con = tipc_conn_lookup(tipc_topsrv(net), conid);
if (!con)
return;
- tipc_close_conn(con);
+
+ test_and_clear_bit(CF_CONNECTED, &con->flags);
+ srv = con->server;
+ if (con->conid)
+ srv->tipc_conn_release(con->conid, con->usr_data);
+ conn_put(con);
conn_put(con);
}
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 2113c9192633..64df7513cd70 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -41,6 +41,9 @@
#include <net/net_namespace.h>
#define TIPC_SERVER_NAME_LEN 32
+#define TIPC_SUB_CLUSTER_SCOPE 0x20
+#define TIPC_SUB_NODE_SCOPE 0x40
+#define TIPC_SUB_NO_STATUS 0x80
/**
* struct tipc_server - TIPC server structure
@@ -71,9 +74,9 @@ struct tipc_server {
int max_rcvbuf_size;
void *(*tipc_conn_new)(int conid);
void (*tipc_conn_release)(int conid, void *usr_data);
- void (*tipc_conn_recvmsg)(struct net *net, int conid,
- struct sockaddr_tipc *addr, void *usr_data,
- void *buf, size_t len);
+ int (*tipc_conn_recvmsg)(struct net *net, int conid,
+ struct sockaddr_tipc *addr, void *usr_data,
+ void *buf, size_t len);
struct sockaddr_tipc *saddr;
char name[TIPC_SERVER_NAME_LEN];
int imp;
@@ -83,8 +86,8 @@ struct tipc_server {
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
struct sockaddr_tipc *addr, void *data, size_t len);
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
- u32 lower, u32 upper, int *conid);
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid);
void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
/**
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 5d18c0caa92b..163f3a547501 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -116,6 +116,7 @@ struct tipc_sock {
struct tipc_mc_method mc_method;
struct rcu_head rcu;
struct tipc_group *group;
+ bool group_is_open;
};
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -710,13 +711,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static unsigned int tipc_poll(struct file *file, struct socket *sock,
+static __poll_t tipc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_group *grp = tsk->group;
- u32 revents = 0;
+ __poll_t revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
@@ -727,18 +727,17 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
+ case TIPC_CONNECTING:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
revents |= POLLOUT;
/* fall thru' */
case TIPC_LISTEN:
- case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
revents |= POLLIN | POLLRDNORM;
break;
case TIPC_OPEN:
- if (!grp || tipc_group_size(grp))
- if (!tsk->cong_link_cnt)
- revents |= POLLOUT;
+ if (tsk->group_is_open && !tsk->cong_link_cnt)
+ revents |= POLLOUT;
if (!tipc_sk_type_connectionless(sk))
break;
if (skb_queue_empty(&sk->sk_receive_queue))
@@ -772,7 +771,6 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct net *net = sock_net(sk);
int mtu = tipc_bcast_get_mtu(net);
struct tipc_mc_method *method = &tsk->mc_method;
- u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
struct sk_buff_head pkts;
struct tipc_nlist dsts;
int rc;
@@ -788,7 +786,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
/* Lookup destination nodes */
tipc_nlist_init(&dsts, tipc_own_addr(net));
tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
- seq->upper, domain, &dsts);
+ seq->upper, &dsts);
if (!dsts.local && !dsts.remote)
return -EHOSTUNREACH;
@@ -928,21 +926,22 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
struct list_head *cong_links = &tsk->cong_links;
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
struct tipc_member *first = NULL;
struct tipc_member *mbr = NULL;
struct net *net = sock_net(sk);
u32 node, port, exclude;
- u32 type, inst, domain;
struct list_head dsts;
+ u32 type, inst, scope;
int lookups = 0;
int dstcnt, rc;
bool cong;
INIT_LIST_HEAD(&dsts);
- type = dest->addr.name.name.type;
+ type = msg_nametype(hdr);
inst = dest->addr.name.name.instance;
- domain = addr_domain(net, dest->scope);
+ scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
while (++lookups < 4) {
@@ -950,7 +949,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
/* Look for a non-congested destination member, if any */
while (1) {
- if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+ if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
&dstcnt, exclude, false))
return -EHOSTUNREACH;
tipc_dest_pop(&dsts, &node, &port);
@@ -1079,22 +1078,23 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
{
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
- struct tipc_name_seq *seq = &dest->addr.nameseq;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- u32 domain, exclude, dstcnt;
+ u32 type, inst, scope, exclude;
struct list_head dsts;
+ u32 dstcnt;
INIT_LIST_HEAD(&dsts);
- if (seq->lower != seq->upper)
- return -ENOTSUPP;
-
- domain = addr_domain(net, dest->scope);
+ type = msg_nametype(hdr);
+ inst = dest->addr.name.name.instance;
+ scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
- if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
- &dsts, &dstcnt, exclude, true))
+
+ if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
+ &dstcnt, exclude, true))
return -EHOSTUNREACH;
if (dstcnt == 1) {
@@ -1116,49 +1116,64 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq)
{
- u32 scope = TIPC_CLUSTER_SCOPE;
u32 self = tipc_own_addr(net);
+ u32 type, lower, upper, scope;
struct sk_buff *skb, *_skb;
- u32 lower = 0, upper = ~0;
- struct sk_buff_head tmpq;
u32 portid, oport, onode;
+ struct sk_buff_head tmpq;
struct list_head dports;
- struct tipc_msg *msg;
- int user, mtyp, hsz;
+ struct tipc_msg *hdr;
+ int user, mtyp, hlen;
+ bool exact;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
- msg = buf_msg(skb);
- user = msg_user(msg);
- mtyp = msg_type(msg);
+ hdr = buf_msg(skb);
+ user = msg_user(hdr);
+ mtyp = msg_type(hdr);
+ hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
+ oport = msg_origport(hdr);
+ onode = msg_orignode(hdr);
+ type = msg_nametype(hdr);
+
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
spin_lock_bh(&inputq->lock);
if (skb_peek(arrvq) == skb) {
__skb_dequeue(arrvq);
__skb_queue_tail(inputq, skb);
}
- refcount_dec(&skb->users);
+ kfree_skb(skb);
spin_unlock_bh(&inputq->lock);
continue;
}
- hsz = skb_headroom(skb) + msg_hdr_sz(msg);
- oport = msg_origport(msg);
- onode = msg_orignode(msg);
- if (onode == self)
- scope = TIPC_NODE_SCOPE;
-
- /* Create destination port list and message clones: */
- if (!msg_in_group(msg)) {
- lower = msg_namelower(msg);
- upper = msg_nameupper(msg);
+
+ /* Group messages require exact scope match */
+ if (msg_in_group(hdr)) {
+ lower = 0;
+ upper = ~0;
+ scope = msg_lookup_scope(hdr);
+ exact = true;
+ } else {
+ /* TIPC_NODE_SCOPE means "any scope" in this context */
+ if (onode == self)
+ scope = TIPC_NODE_SCOPE;
+ else
+ scope = TIPC_CLUSTER_SCOPE;
+ exact = false;
+ lower = msg_namelower(hdr);
+ upper = msg_nameupper(hdr);
}
- tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
- scope, &dports);
+
+ /* Create destination port list: */
+ tipc_nametbl_mc_lookup(net, type, lower, upper,
+ scope, exact, &dports);
+
+ /* Clone message per destination */
while (tipc_dest_pop(&dports, NULL, &portid)) {
- _skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
+ _skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
if (_skb) {
msg_set_destport(buf_msg(_skb), portid);
__skb_queue_tail(&tmpq, _skb);
@@ -1933,8 +1948,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
break;
case TOP_SRV:
tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
- skb, inputq, xmitq);
- skb = NULL;
+ hdr, inputq, xmitq);
break;
default:
break;
@@ -2640,9 +2654,7 @@ void tipc_sk_reinit(struct net *net)
rhashtable_walk_enter(&tn->sk_rht, &iter);
do {
- tsk = ERR_PTR(rhashtable_walk_start(&iter));
- if (IS_ERR(tsk))
- goto walk_stop;
+ rhashtable_walk_start(&iter);
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2651,7 +2663,7 @@ void tipc_sk_reinit(struct net *net)
msg_set_orignode(msg, tn->own_addr);
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
-walk_stop:
+
rhashtable_walk_stop(&iter);
} while (tsk == ERR_PTR(-EAGAIN));
}
@@ -2734,7 +2746,6 @@ void tipc_sk_rht_destroy(struct net *net)
static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
{
struct net *net = sock_net(&tsk->sk);
- u32 domain = addr_domain(net, mreq->scope);
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq seq;
@@ -2742,9 +2753,11 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
if (mreq->type < TIPC_RESERVED_TYPES)
return -EACCES;
+ if (mreq->scope > TIPC_NODE_SCOPE)
+ return -EINVAL;
if (grp)
return -EACCES;
- grp = tipc_group_create(net, tsk->portid, mreq);
+ grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
if (!grp)
return -ENOMEM;
tsk->group = grp;
@@ -2754,16 +2767,17 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
seq.type = mreq->type;
seq.lower = mreq->instance;
seq.upper = seq.lower;
- tipc_nametbl_build_group(net, grp, mreq->type, domain);
+ tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
rc = tipc_sk_publish(tsk, mreq->scope, &seq);
if (rc) {
tipc_group_delete(net, grp);
tsk->group = NULL;
+ return rc;
}
-
- /* Eliminate any risk that a broadcast overtakes the sent JOIN */
+ /* Eliminate any risk that a broadcast overtakes sent JOINs */
tsk->mc_method.rcast = true;
tsk->mc_method.mandatory = true;
+ tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
return rc;
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 251065dfd8df..68e26470c516 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -118,15 +118,19 @@ void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
u32 found_upper, u32 event, u32 port_ref,
- u32 node, int must)
+ u32 node, u32 scope, int must)
{
+ u32 filter = htohl(sub->evt.s.filter, sub->swap);
struct tipc_name_seq seq;
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
- if (!must &&
- !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS))
+ if (!must && !(filter & TIPC_SUB_PORTS))
+ return;
+ if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
+ return;
+ if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
return;
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
@@ -285,21 +289,21 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
return sub;
}
-static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
- struct tipc_subscriber *subscriber, int swap)
+static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
+ struct tipc_subscriber *subscriber, int swap,
+ bool status)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_subscription *sub = NULL;
u32 timeout;
sub = tipc_subscrp_create(net, s, swap);
if (!sub)
- return tipc_conn_terminate(tn->topsrv, subscriber->conid);
+ return -1;
spin_lock_bh(&subscriber->lock);
list_add(&sub->subscrp_list, &subscriber->subscrp_list);
sub->subscriber = subscriber;
- tipc_nametbl_subscribe(sub);
+ tipc_nametbl_subscribe(sub, status);
tipc_subscrb_get(subscriber);
spin_unlock_bh(&subscriber->lock);
@@ -308,6 +312,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
if (timeout != TIPC_WAIT_FOREVER)
mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
+ return 0;
}
/* Handle one termination request for the subscriber */
@@ -317,12 +322,13 @@ static void tipc_subscrb_release_cb(int conid, void *usr_data)
}
/* Handle one request to create a new subscription for the subscriber */
-static void tipc_subscrb_rcv_cb(struct net *net, int conid,
- struct sockaddr_tipc *addr, void *usr_data,
- void *buf, size_t len)
+static int tipc_subscrb_rcv_cb(struct net *net, int conid,
+ struct sockaddr_tipc *addr, void *usr_data,
+ void *buf, size_t len)
{
struct tipc_subscriber *subscriber = usr_data;
struct tipc_subscr *s = (struct tipc_subscr *)buf;
+ bool status;
int swap;
/* Determine subscriber's endianness */
@@ -332,10 +338,11 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
/* Detect & process a subscription cancellation request */
if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
- return tipc_subscrp_cancel(s, subscriber);
+ tipc_subscrp_cancel(s, subscriber);
+ return 0;
}
-
- tipc_subscrp_subscribe(net, s, subscriber, swap);
+ status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
+ return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
}
/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ee52957dc952..f3edca775d9f 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -71,7 +71,7 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
u32 found_upper);
void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
u32 found_lower, u32 found_upper, u32 event,
- u32 port_ref, u32 node, int must);
+ u32 port_ref, u32 node, u32 scope, int must);
void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
struct tipc_name_seq *out);
u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ecca64fc6a6f..3deabcab4882 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
goto rcu_out;
}
- tipc_rcv(sock_net(sk), skb, b);
- rcu_read_unlock();
- return 0;
-
rcu_out:
rcu_read_unlock();
out:
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index e07ee3ae0023..736719c8314e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
crypto_info = &ctx->crypto_send;
/* Currently we don't support set crypto info more than one time */
- if (TLS_CRYPTO_INFO_READY(crypto_info))
+ if (TLS_CRYPTO_INFO_READY(crypto_info)) {
+ rc = -EBUSY;
goto out;
+ }
rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
@@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
case TLS_CIPHER_AES_GCM_128: {
if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
rc = -EINVAL;
- goto out;
+ goto err_crypto_info;
}
rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
optlen - sizeof(*crypto_info));
@@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
}
default:
rc = -EINVAL;
- goto out;
+ goto err_crypto_info;
}
/* currently SW is default, we will have ethtool in future */
@@ -454,6 +456,15 @@ static int tls_init(struct sock *sk)
struct tls_context *ctx;
int rc = 0;
+ /* The TLS ulp is currently supported only for TCP sockets
+ * in ESTABLISHED state.
+ * Supporting sockets in LISTEN state will require us
+ * to modify the accept implementation to clone rather then
+ * share the ulp context.
+ */
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTSUPP;
+
/* allocate tls context */
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 73d19210dd49..f26376e954ae 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -214,7 +214,11 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
data_len, tls_ctx->iv);
- rc = crypto_aead_encrypt(aead_req);
+
+ aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &ctx->async_wait);
+
+ rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size;
ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size;
@@ -391,7 +395,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
while (msg_data_left(msg)) {
if (sk->sk_err) {
- ret = sk->sk_err;
+ ret = -sk->sk_err;
goto send_end;
}
@@ -544,7 +548,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
size_t copy, required_size;
if (sk->sk_err) {
- ret = sk->sk_err;
+ ret = -sk->sk_err;
goto sendpage_end;
}
@@ -577,6 +581,8 @@ alloc_payload:
get_page(page);
sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
sg_set_page(sg, page, copy, offset);
+ sg_unmark_end(sg);
+
ctx->sg_plaintext_num_elem++;
sk_mem_charge(sk, copy);
@@ -663,6 +669,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
goto out;
}
+ crypto_init_wait(&sw_ctx->async_wait);
+
ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
crypto_info = &ctx->crypto_send;
@@ -681,18 +689,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
}
default:
rc = -EINVAL;
- goto out;
+ goto free_priv;
}
ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
ctx->tag_size = tag_size;
ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
ctx->iv_size = iv_size;
- ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- GFP_KERNEL);
+ ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
if (!ctx->iv) {
rc = -ENOMEM;
- goto out;
+ goto free_priv;
}
memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
@@ -740,7 +747,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
if (!rc)
- goto out;
+ return 0;
free_aead:
crypto_free_aead(sw_ctx->aead_send);
@@ -751,6 +758,9 @@ free_rec_seq:
free_iv:
kfree(ctx->iv);
ctx->iv = NULL;
+free_priv:
+ kfree(ctx->priv_ctx);
+ ctx->priv_ctx = NULL;
out:
return rc;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a9ee634f3c42..0214acbd6bff 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -367,7 +367,7 @@ static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int
/* relaying can only happen while the wq still exists */
u_sleep = sk_sleep(&u->sk);
if (u_sleep)
- wake_up_interruptible_poll(u_sleep, key);
+ wake_up_interruptible_poll(u_sleep, key_to_poll(key));
return 0;
}
@@ -638,8 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
-static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
-static unsigned int unix_dgram_poll(struct file *, struct socket *,
+static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
+static __poll_t unix_dgram_poll(struct file *, struct socket *,
poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
@@ -2640,10 +2640,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return err;
}
-static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
@@ -2675,11 +2675,12 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
return mask;
}
-static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
+static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk, *other;
- unsigned int mask, writable;
+ unsigned int writable;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
@@ -2869,7 +2870,6 @@ static int unix_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations unix_seq_fops = {
- .owner = THIS_MODULE,
.open = unix_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 5d28abf87fbf..9d95e773f4c8 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,11 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode)
return err;
}
-static unsigned int vsock_poll(struct file *file, struct socket *sock,
+static __poll_t vsock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk;
- unsigned int mask;
+ __poll_t mask;
struct vsock_sock *vsk;
sk = sock->sk;
@@ -951,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
* POLLOUT|POLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
- if (sk->sk_state == TCP_CLOSE) {
+ if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= POLLOUT | POLLWRNORM;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 5583df708b8c..a827547aa102 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk)
lock_sock(sk);
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
vsock_remove_sock(vsk);
release_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 391775e3575c..a7a73ffe675b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -797,11 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk)
/* We should not be sending anymore since the peer won't be
* there to receive, but we can still receive if there is data
- * left in our consume queue.
+ * left in our consume queue. If the local endpoint is a host,
+ * we can't call vsock_stream_has_data, since that may block,
+ * but a host endpoint can't read data once the VM has
+ * detached, so there is no available data in that case.
*/
- if (vsock_stream_has_data(vsk) <= 0) {
- sk->sk_state = TCP_CLOSE;
-
+ if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
+ vsock_stream_has_data(vsk) <= 0) {
if (sk->sk_state == TCP_SYN_SENT) {
/* The peer may detach from a queue pair while
* we are still in the connecting state, i.e.,
@@ -811,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk)
* event like a reset.
*/
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
return;
}
+ sk->sk_state = TCP_CLOSE;
}
sk->sk_state_change(sk);
}
@@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
-MODULE_VERSION("1.0.4.0-k");
+MODULE_VERSION("1.0.5.0-k");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("vmware_vsock");
MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index da91bb547db3..1abcc4fc4df1 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -20,6 +20,10 @@ config CFG80211
tristate "cfg80211 - wireless configuration API"
depends on RFKILL || !RFKILL
select FW_LOADER
+ # may need to update this when certificates are changed and are
+ # using a different algorithm, though right now they shouldn't
+ # (this is here rather than below to allow it to be a module)
+ select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS
---help---
cfg80211 is the Linux wireless LAN (802.11) configuration API.
Enable this if you have a wireless device.
@@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR
certificates like in the kernel sources (net/wireless/certs/)
that shall be accepted for a signed regulatory database.
+ Note that you need to also select the correct CRYPTO_<hash> modules
+ for your certificates, and if cfg80211 is built-in they also must be.
+
config CFG80211_REG_CELLULAR_HINTS
bool "cfg80211 regulatory support for cellular base station hints"
depends on CFG80211_CERTIFICATION_ONUS
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 278d979c211a..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
@$(kecho) " GEN $@"
- @echo '#include "reg.h"' > $@
- @echo 'const u8 shipped_regdb_certs[] = {' >> $@
- @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
- @echo '};' >> $@
- @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+ @(echo '#include "reg.h"'; \
+ echo 'const u8 shipped_regdb_certs[] = {'; \
+ cat $^ ; \
+ echo '};'; \
+ echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+ ) > $@
$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
$(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@$(kecho) " GEN $@"
- @echo '#include "reg.h"' > $@
- @echo 'const u8 extra_regdb_certs[] = {' >> $@
- @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
- @echo '};' >> $@
- @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
+ @(set -e; \
+ allf=""; \
+ for f in $^ ; do \
+ # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
+ thisf=$$(od -An -v -tx1 < $$f | \
+ sed -e 's/ /\n/g' | \
+ sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
+ sed -e 's/^/0x/;s/$$/,/'); \
+ # file should not be empty - maybe command substitution failed? \
+ test ! -z "$$thisf";\
+ allf=$$allf$$thisf;\
+ done; \
+ ( \
+ echo '#include "reg.h"'; \
+ echo 'const u8 extra_regdb_certs[] = {'; \
+ echo "$$allf"; \
+ echo '};'; \
+ echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
+ ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/core.c b/net/wireless/core.c
index fdde0d98fde1..a6f3cac8c640 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
if (rv)
goto use_default_name;
} else {
+ int rv;
+
use_default_name:
/* NOTE: This is *probably* safe w/out holding rtnl because of
* the restrictions on phy names. Probably this call could
@@ -446,7 +448,11 @@ use_default_name:
* phyX. But, might should add some locking and check return
* value, and use a different name if this one exists?
*/
- dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+ rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+ if (rv < 0) {
+ kfree(rdev);
+ return NULL;
+ }
}
INIT_LIST_HEAD(&rdev->wiphy.wdev_list);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d2f7e8b8a097..eaff636169c2 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -507,8 +507,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
-#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
-
#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
#define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond)
#else
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 413d4f4e6334..a1d10993d08a 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -126,6 +126,11 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
wdev->ibss_fixed = params->channel_fixed;
wdev->ibss_dfs_possible = params->userspace_handles_dfs;
wdev->chandef = params->chandef;
+ if (connkeys) {
+ params->wep_keys = connkeys->params;
+ params->wep_tx_key = connkeys->def;
+ }
+
#ifdef CONFIG_CFG80211_WEXT
wdev->wext.ibss.chandef = params->chandef;
#endif
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index e7c64a8dce54..bbb9907bfa86 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -692,7 +692,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
return rdev_mgmt_tx(rdev, wdev, params, cookie);
}
-bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
+bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
const u8 *buf, size_t len, u32 flags)
{
struct wiphy *wiphy = wdev->wiphy;
@@ -708,7 +708,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
u16 stype;
- trace_cfg80211_rx_mgmt(wdev, freq, sig_mbm);
+ trace_cfg80211_rx_mgmt(wdev, freq, sig_dbm);
stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
if (!(stypes->rx & BIT(stype))) {
@@ -735,7 +735,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
/* Indicate the received Action frame to user space */
if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
- freq, sig_mbm,
+ freq, sig_dbm,
buf, len, flags, GFP_ATOMIC))
continue;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..ab0c687d0c44 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -734,11 +734,12 @@ struct key_parse {
bool def_uni, def_multi;
};
-static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
+static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key,
+ struct key_parse *k)
{
struct nlattr *tb[NL80211_KEY_MAX + 1];
int err = nla_parse_nested(tb, NL80211_KEY_MAX, key,
- nl80211_key_policy, NULL);
+ nl80211_key_policy, info->extack);
if (err)
return err;
@@ -771,7 +772,8 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
if (tb[NL80211_KEY_TYPE]) {
k->type = nla_get_u32(tb[NL80211_KEY_TYPE]);
if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
- return -EINVAL;
+ return genl_err_attr(info, -EINVAL,
+ tb[NL80211_KEY_TYPE]);
}
if (tb[NL80211_KEY_DEFAULT_TYPES]) {
@@ -779,7 +781,8 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
tb[NL80211_KEY_DEFAULT_TYPES],
- nl80211_key_default_policy, NULL);
+ nl80211_key_default_policy,
+ info->extack);
if (err)
return err;
@@ -820,8 +823,10 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
- if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
+ if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) {
+ GENL_SET_ERR_MSG(info, "key type out of range");
return -EINVAL;
+ }
}
if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
@@ -850,31 +855,42 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
k->type = -1;
if (info->attrs[NL80211_ATTR_KEY])
- err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k);
+ err = nl80211_parse_key_new(info, info->attrs[NL80211_ATTR_KEY], k);
else
err = nl80211_parse_key_old(info, k);
if (err)
return err;
- if (k->def && k->defmgmt)
+ if (k->def && k->defmgmt) {
+ GENL_SET_ERR_MSG(info, "key with def && defmgmt is invalid");
return -EINVAL;
+ }
if (k->defmgmt) {
- if (k->def_uni || !k->def_multi)
+ if (k->def_uni || !k->def_multi) {
+ GENL_SET_ERR_MSG(info, "defmgmt key must be mcast");
return -EINVAL;
+ }
}
if (k->idx != -1) {
if (k->defmgmt) {
- if (k->idx < 4 || k->idx > 5)
+ if (k->idx < 4 || k->idx > 5) {
+ GENL_SET_ERR_MSG(info,
+ "defmgmt key idx not 4 or 5");
return -EINVAL;
+ }
} else if (k->def) {
- if (k->idx < 0 || k->idx > 3)
+ if (k->idx < 0 || k->idx > 3) {
+ GENL_SET_ERR_MSG(info, "def key idx not 0-3");
return -EINVAL;
+ }
} else {
- if (k->idx < 0 || k->idx > 5)
+ if (k->idx < 0 || k->idx > 5) {
+ GENL_SET_ERR_MSG(info, "key idx not 0-5");
return -EINVAL;
+ }
}
}
@@ -883,8 +899,9 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
static struct cfg80211_cached_keys *
nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
- struct nlattr *keys, bool *no_ht)
+ struct genl_info *info, bool *no_ht)
{
+ struct nlattr *keys = info->attrs[NL80211_ATTR_KEYS];
struct key_parse parse;
struct nlattr *key;
struct cfg80211_cached_keys *result;
@@ -909,17 +926,22 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
memset(&parse, 0, sizeof(parse));
parse.idx = -1;
- err = nl80211_parse_key_new(key, &parse);
+ err = nl80211_parse_key_new(info, key, &parse);
if (err)
goto error;
err = -EINVAL;
if (!parse.p.key)
goto error;
- if (parse.idx < 0 || parse.idx > 3)
+ if (parse.idx < 0 || parse.idx > 3) {
+ GENL_SET_ERR_MSG(info, "key index out of range [0-3]");
goto error;
+ }
if (parse.def) {
- if (def)
+ if (def) {
+ GENL_SET_ERR_MSG(info,
+ "only one key can be default");
goto error;
+ }
def = 1;
result->def = parse.idx;
if (!parse.def_uni || !parse.def_multi)
@@ -932,6 +954,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
goto error;
if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 &&
parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) {
+ GENL_SET_ERR_MSG(info, "connect key must be WEP");
err = -EINVAL;
goto error;
}
@@ -947,6 +970,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
if (result->def < 0) {
err = -EINVAL;
+ GENL_SET_ERR_MSG(info, "need a default/TX key");
goto error;
}
@@ -2610,7 +2634,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
case NL80211_IFTYPE_AP:
if (wdev->ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
- goto nla_put_failure;
+ goto nla_put_failure_locked;
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
@@ -2618,12 +2642,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
const u8 *ssid_ie;
if (!wdev->current_bss)
break;
+ rcu_read_lock();
ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
WLAN_EID_SSID);
- if (!ssid_ie)
- break;
- if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
- goto nla_put_failure;
+ if (ssid_ie &&
+ nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
+ goto nla_put_failure_rcu_locked;
+ rcu_read_unlock();
break;
}
default:
@@ -2635,6 +2660,10 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
genlmsg_end(msg, hdr);
return 0;
+ nla_put_failure_rcu_locked:
+ rcu_read_unlock();
+ nla_put_failure_locked:
+ wdev_unlock(wdev);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -7815,6 +7844,11 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
intbss->ts_boottime, NL80211_BSS_PAD))
goto nla_put_failure;
+ if (!nl80211_put_signal(msg, intbss->pub.chains,
+ intbss->pub.chain_signal,
+ NL80211_BSS_CHAIN_SIGNAL))
+ goto nla_put_failure;
+
switch (rdev->wiphy.signal_type) {
case CFG80211_SIGNAL_TYPE_MBM:
if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal))
@@ -8611,9 +8645,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
bool no_ht = false;
- connkeys = nl80211_parse_connkeys(rdev,
- info->attrs[NL80211_ATTR_KEYS],
- &no_ht);
+ connkeys = nl80211_parse_connkeys(rdev, info, &no_ht);
if (IS_ERR(connkeys))
return PTR_ERR(connkeys);
@@ -9017,8 +9049,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
}
if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
- connkeys = nl80211_parse_connkeys(rdev,
- info->attrs[NL80211_ATTR_KEYS], NULL);
+ connkeys = nl80211_parse_connkeys(rdev, info, NULL);
if (IS_ERR(connkeys))
return PTR_ERR(connkeys);
}
@@ -9804,7 +9835,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
*/
if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
rdev->ops->get_station) {
- struct station_info sinfo;
+ struct station_info sinfo = {};
u8 *mac_addr;
mac_addr = wdev->current_bss->pub.bssid;
@@ -11359,7 +11390,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
break;
case NL80211_NAN_FUNC_FOLLOW_UP:
if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
- !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
err = -EINVAL;
goto out;
}
@@ -13942,7 +13974,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- (from_ap && reason &&
+ (reason &&
nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason)) ||
(from_ap &&
nla_put_flag(msg, NL80211_ATTR_DISCONNECTED_BY_AP)) ||
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 78e71b0390be..7b42f0bacfd8 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1769,8 +1769,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS)
return;
- chan_before.center_freq = chan->center_freq;
- chan_before.flags = chan->flags;
+ chan_before = *chan;
if (chan->flags & IEEE80211_CHAN_NO_IR) {
chan->flags &= ~IEEE80211_CHAN_NO_IR;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index f6c5fe482506..d36c3eb7b931 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -981,6 +981,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
found->ts = tmp->ts;
found->ts_boottime = tmp->ts_boottime;
found->parent_tsf = tmp->parent_tsf;
+ found->pub.chains = tmp->pub.chains;
+ memcpy(found->pub.chain_signal, tmp->pub.chain_signal,
+ IEEE80211_MAX_CHAINS);
ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
} else {
struct cfg80211_internal_bss *new;
@@ -1233,6 +1236,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
tmp.ts_boottime = data->boottime_ns;
tmp.parent_tsf = data->parent_tsf;
+ tmp.pub.chains = data->chains;
+ memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index f3353fe5b35b..bcfedd39e7a3 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2544,20 +2544,20 @@ DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta,
);
TRACE_EVENT(cfg80211_rx_mgmt,
- TP_PROTO(struct wireless_dev *wdev, int freq, int sig_mbm),
- TP_ARGS(wdev, freq, sig_mbm),
+ TP_PROTO(struct wireless_dev *wdev, int freq, int sig_dbm),
+ TP_ARGS(wdev, freq, sig_dbm),
TP_STRUCT__entry(
WDEV_ENTRY
__field(int, freq)
- __field(int, sig_mbm)
+ __field(int, sig_dbm)
),
TP_fast_assign(
WDEV_ASSIGN;
__entry->freq = freq;
- __entry->sig_mbm = sig_mbm;
+ __entry->sig_dbm = sig_dbm;
),
- TP_printk(WDEV_PR_FMT ", freq: %d, sig mbm: %d",
- WDEV_PR_ARG, __entry->freq, __entry->sig_mbm)
+ TP_printk(WDEV_PR_FMT ", freq: %d, sig dbm: %d",
+ WDEV_PR_ARG, __entry->freq, __entry->sig_dbm)
);
TRACE_EVENT(cfg80211_mgmt_tx_status,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7ca04a7de85a..05186a47878f 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- /* we are under RTNL - globally locked - so can use a static struct */
- static struct station_info sinfo;
+ struct station_info sinfo = {};
u8 addr[ETH_ALEN];
int err;
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 6cdb054484d6..9efbfc753347 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev,
}
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
- void __user *arg)
+int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct iw_request_info info = { .cmd = cmd, .flags = 0 };
+ struct iwreq iwr;
int ret;
- ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
+ iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+
+ ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
ioctl_standard_call,
ioctl_private_call);
if (ret >= 0 &&
IW_IS_GET(cmd) &&
- copy_to_user(arg, iwr, sizeof(struct iwreq)))
+ copy_to_user(arg, &iwr, sizeof(struct iwreq)))
return -EFAULT;
return ret;
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index e98a01c1034f..5511f989ef47 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -133,7 +133,6 @@ static int seq_open_wireless(struct inode *inode, struct file *file)
}
static const struct file_operations wireless_seq_fops = {
- .owner = THIS_MODULE,
.open = seq_open_wireless,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 30e5746085b8..8e70291e586a 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -23,32 +23,114 @@
#include <linux/notifier.h>
#ifdef CONFIG_XFRM_OFFLOAD
-int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
{
int err;
+ unsigned long flags;
struct xfrm_state *x;
+ struct sk_buff *skb2;
+ struct softnet_data *sd;
+ netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
- if (skb_is_gso(skb))
- return 0;
+ if (!xo)
+ return skb;
- if (xo) {
- x = skb->sp->xvec[skb->sp->len - 1];
- if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
- return 0;
+ if (!(features & NETIF_F_HW_ESP))
+ esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+
+ x = skb->sp->xvec[skb->sp->len - 1];
+ if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ return skb;
+ local_irq_save(flags);
+ sd = this_cpu_ptr(&softnet_data);
+ err = !skb_queue_empty(&sd->xfrm_backlog);
+ local_irq_restore(flags);
+
+ if (err) {
+ *again = true;
+ return skb;
+ }
+
+ if (skb_is_gso(skb)) {
+ struct net_device *dev = skb->dev;
+
+ if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) {
+ struct sk_buff *segs;
+
+ /* Packet got rerouted, fixup features and segment it. */
+ esp_features = esp_features & ~(NETIF_F_HW_ESP
+ | NETIF_F_GSO_ESP);
+
+ segs = skb_gso_segment(skb, esp_features);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ atomic_long_inc(&dev->tx_dropped);
+ return NULL;
+ } else {
+ consume_skb(skb);
+ skb = segs;
+ }
+ }
+ }
+
+ if (!skb->next) {
x->outer_mode->xmit(x, skb);
- err = x->type_offload->xmit(x, skb, features);
+ xo->flags |= XFRM_DEV_RESUME;
+
+ err = x->type_offload->xmit(x, skb, esp_features);
if (err) {
+ if (err == -EINPROGRESS)
+ return NULL;
+
XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
- return err;
+ kfree_skb(skb);
+ return NULL;
}
skb_push(skb, skb->data - skb_mac_header(skb));
+
+ return skb;
}
- return 0;
+ skb2 = skb;
+
+ do {
+ struct sk_buff *nskb = skb2->next;
+ skb2->next = NULL;
+
+ xo = xfrm_offload(skb2);
+ xo->flags |= XFRM_DEV_RESUME;
+
+ x->outer_mode->xmit(x, skb2);
+
+ err = x->type_offload->xmit(x, skb2, esp_features);
+ if (!err) {
+ skb2->next = nskb;
+ } else if (err != -EINPROGRESS) {
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ skb2->next = nskb;
+ kfree_skb_list(skb2);
+ return NULL;
+ } else {
+ if (skb == skb2)
+ skb = nskb;
+
+ if (!skb)
+ return NULL;
+
+ goto skip_push;
+ }
+
+ skb_push(skb2, skb2->data - skb_mac_header(skb2));
+
+skip_push:
+ skb2 = nskb;
+ } while (skb2);
+
+ return skb;
}
EXPORT_SYMBOL_GPL(validate_xmit_xfrm);
@@ -65,9 +147,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (!x->type_offload)
return -EINVAL;
- /* We don't yet support UDP encapsulation, TFC padding and ESN. */
- if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
- return 0;
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if (x->encap || x->tfcpad)
+ return -EINVAL;
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
@@ -96,12 +178,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return 0;
}
+ if (x->props.flags & XFRM_STATE_ESN &&
+ !dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
+ xso->dev = NULL;
+ dev_put(dev);
+ return -EINVAL;
+ }
+
xso->dev = dev;
xso->num_exthdrs = 1;
xso->flags = xuo->flags;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
+ xso->dev = NULL;
dev_put(dev);
return err;
}
@@ -120,8 +210,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
if (!x->type_offload || x->encap)
return false;
- if ((x->xso.offload_handle && (dev == dst->path->dev)) &&
- !dst->child->xfrm && x->type->get_mtu) {
+ if ((!dev || (x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev))) &&
+ (!xdst->child->xfrm && x->type->get_mtu)) {
mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
if (skb->len <= mtu)
@@ -140,19 +230,82 @@ ok:
return true;
}
EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+
+void xfrm_dev_resume(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ int ret = NETDEV_TX_BUSY;
+ struct netdev_queue *txq;
+ struct softnet_data *sd;
+ unsigned long flags;
+
+ rcu_read_lock();
+ txq = netdev_pick_tx(dev, skb, NULL);
+
+ HARD_TX_LOCK(dev, txq, smp_processor_id());
+ if (!netif_xmit_frozen_or_stopped(txq))
+ skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+ HARD_TX_UNLOCK(dev, txq);
+
+ if (!dev_xmit_complete(ret)) {
+ local_irq_save(flags);
+ sd = this_cpu_ptr(&softnet_data);
+ skb_queue_tail(&sd->xfrm_backlog, skb);
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_resume);
+
+void xfrm_dev_backlog(struct softnet_data *sd)
+{
+ struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog;
+ struct sk_buff_head list;
+ struct sk_buff *skb;
+
+ if (skb_queue_empty(xfrm_backlog))
+ return;
+
+ __skb_queue_head_init(&list);
+
+ spin_lock(&xfrm_backlog->lock);
+ skb_queue_splice_init(xfrm_backlog, &list);
+ spin_unlock(&xfrm_backlog->lock);
+
+ while (!skb_queue_empty(&list)) {
+ skb = __skb_dequeue(&list);
+ xfrm_dev_resume(skb);
+ }
+
+}
#endif
-static int xfrm_dev_register(struct net_device *dev)
+static int xfrm_api_check(struct net_device *dev)
{
- if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
- return NOTIFY_BAD;
+#ifdef CONFIG_XFRM_OFFLOAD
if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
!(dev->features & NETIF_F_HW_ESP))
return NOTIFY_BAD;
+ if ((dev->features & NETIF_F_HW_ESP) &&
+ (!(dev->xfrmdev_ops &&
+ dev->xfrmdev_ops->xdo_dev_state_add &&
+ dev->xfrmdev_ops->xdo_dev_state_delete)))
+ return NOTIFY_BAD;
+#else
+ if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM))
+ return NOTIFY_BAD;
+#endif
+
return NOTIFY_DONE;
}
+static int xfrm_dev_register(struct net_device *dev)
+{
+ return xfrm_api_check(dev);
+}
+
static int xfrm_dev_unregister(struct net_device *dev)
{
xfrm_policy_cache_flush();
@@ -161,16 +314,7 @@ static int xfrm_dev_unregister(struct net_device *dev)
static int xfrm_dev_feat_change(struct net_device *dev)
{
- if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
- return NOTIFY_BAD;
- else if (!(dev->features & NETIF_F_HW_ESP))
- dev->xfrmdev_ops = NULL;
-
- if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
- !(dev->features & NETIF_F_HW_ESP))
- return NOTIFY_BAD;
-
- return NOTIFY_DONE;
+ return xfrm_api_check(dev);
}
static int xfrm_dev_down(struct net_device *dev)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d5..1472c0857975 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
*
*/
+#include <linux/bottom_half.h>
+#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <linux/percpu.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
+struct xfrm_trans_tasklet {
+ struct tasklet_struct tasklet;
+ struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+ int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
static struct kmem_cache *secpath_cachep __read_mostly;
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
static struct gro_cells gro_cells;
static struct net_device xfrm_napi_dev;
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
{
int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
xfrm_address_t *daddr;
struct xfrm_mode *inner_mode;
u32 mark = skb->mark;
- unsigned int family;
+ unsigned int family = AF_UNSPEC;
int decaps = 0;
int async = 0;
bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (encap_type < 0) {
x = xfrm_input_state(skb);
+
+ if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+ if (x->km.state == XFRM_STATE_ACQ)
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
+ else
+ XFRM_INC_STATS(net,
+ LINUX_MIB_XFRMINSTATEINVALID);
+ goto drop;
+ }
+
family = x->outer_mode->afinfo->family;
/* An encap_type of -1 indicates async resumption. */
@@ -231,7 +257,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (xo && (xo->flags & CRYPTO_DONE)) {
crypto_done = true;
- x = xfrm_input_state(skb);
family = XFRM_SPI_SKB_CB(skb)->family;
if (!(xo->status & CRYPTO_SUCCESS)) {
@@ -467,9 +492,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
}
EXPORT_SYMBOL(xfrm_input_resume);
+static void xfrm_trans_reinject(unsigned long data)
+{
+ struct xfrm_trans_tasklet *trans = (void *)data;
+ struct sk_buff_head queue;
+ struct sk_buff *skb;
+
+ __skb_queue_head_init(&queue);
+ skb_queue_splice_init(&trans->queue, &queue);
+
+ while ((skb = __skb_dequeue(&queue)))
+ XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb,
+ int (*finish)(struct net *, struct sock *,
+ struct sk_buff *))
+{
+ struct xfrm_trans_tasklet *trans;
+
+ trans = this_cpu_ptr(&xfrm_trans_tasklet);
+
+ if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ return -ENOBUFS;
+
+ XFRM_TRANS_SKB_CB(skb)->finish = finish;
+ __skb_queue_tail(&trans->queue, skb);
+ tasklet_schedule(&trans->tasklet);
+ return 0;
+}
+EXPORT_SYMBOL(xfrm_trans_queue);
+
void __init xfrm_input_init(void)
{
int err;
+ int i;
init_dummy_netdev(&xfrm_napi_dev);
err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +537,13 @@ void __init xfrm_input_init(void)
sizeof(struct sec_path),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+
+ for_each_possible_cpu(i) {
+ struct xfrm_trans_tasklet *trans;
+
+ trans = &per_cpu(xfrm_trans_tasklet, i);
+ __skb_queue_head_init(&trans->queue);
+ tasklet_init(&trans->tasklet, xfrm_trans_reinject,
+ (unsigned long)trans);
+ }
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 73ad8c8ef344..23468672a767 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -44,7 +44,7 @@ static int xfrm_skb_check_space(struct sk_buff *skb)
static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
{
- struct dst_entry *child = dst_clone(skb_dst(skb)->child);
+ struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb)));
skb_dst_drop(skb);
return child;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f9..7a23078132cf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -54,7 +54,7 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
static struct kmem_cache *xfrm_dst_cache __read_mostly;
static __read_mostly seqcount_t xfrm_policy_hash_generation;
-static void xfrm_init_pmtu(struct dst_entry *dst);
+static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
static void xfrm_policy_queue_process(struct timer_list *t);
@@ -609,7 +609,8 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+ if (policy->walk.dead ||
+ xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
/* skip socket policies */
continue;
}
@@ -974,8 +975,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
}
if (!cnt)
err = -ESRCH;
- else
- xfrm_policy_cache_flush();
out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return err;
@@ -1168,9 +1167,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
again:
pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) {
- bool match = xfrm_selector_match(&pol->selector, fl, family);
+ bool match;
int err = 0;
+ if (pol->family != family) {
+ pol = NULL;
+ goto out;
+ }
+
+ match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
pol = NULL;
@@ -1251,7 +1256,7 @@ EXPORT_SYMBOL(xfrm_policy_delete);
int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
- struct net *net = xp_net(pol);
+ struct net *net = sock_net(sk);
struct xfrm_policy *old_pol;
#ifdef CONFIG_XFRM_SUB_POLICY
@@ -1538,7 +1543,9 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
*/
static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
- struct xfrm_state **xfrm, int nx,
+ struct xfrm_state **xfrm,
+ struct xfrm_dst **bundle,
+ int nx,
const struct flowi *fl,
struct dst_entry *dst)
{
@@ -1546,8 +1553,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
unsigned long now = jiffies;
struct net_device *dev;
struct xfrm_mode *inner_mode;
- struct dst_entry *dst_prev = NULL;
- struct dst_entry *dst0 = NULL;
+ struct xfrm_dst *xdst_prev = NULL;
+ struct xfrm_dst *xdst0 = NULL;
int i = 0;
int err;
int header_len = 0;
@@ -1573,13 +1580,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
goto put_states;
}
- if (!dst_prev)
- dst0 = dst1;
+ bundle[i] = xdst;
+ if (!xdst_prev)
+ xdst0 = xdst;
else
/* Ref count is taken during xfrm_alloc_dst()
* No need to do dst_clone() on dst1
*/
- dst_prev->child = dst1;
+ xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
if (xfrm[i]->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(xfrm[i],
@@ -1616,8 +1624,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst1->input = dst_discard;
dst1->output = inner_mode->afinfo->output;
- dst1->next = dst_prev;
- dst_prev = dst1;
+ xdst_prev = xdst;
header_len += xfrm[i]->props.header_len;
if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
@@ -1625,40 +1632,39 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
trailer_len += xfrm[i]->props.trailer_len;
}
- dst_prev->child = dst;
- dst0->path = dst;
+ xfrm_dst_set_child(xdst_prev, dst);
+ xdst0->path = dst;
err = -ENODEV;
dev = dst->dev;
if (!dev)
goto free_dst;
- xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
- xfrm_init_pmtu(dst_prev);
+ xfrm_init_path(xdst0, dst, nfheader_len);
+ xfrm_init_pmtu(bundle, nx);
- for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
- struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
-
- err = xfrm_fill_dst(xdst, dev, fl);
+ for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst;
+ xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) {
+ err = xfrm_fill_dst(xdst_prev, dev, fl);
if (err)
goto free_dst;
- dst_prev->header_len = header_len;
- dst_prev->trailer_len = trailer_len;
- header_len -= xdst->u.dst.xfrm->props.header_len;
- trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
+ xdst_prev->u.dst.header_len = header_len;
+ xdst_prev->u.dst.trailer_len = trailer_len;
+ header_len -= xdst_prev->u.dst.xfrm->props.header_len;
+ trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
}
out:
- return dst0;
+ return &xdst0->u.dst;
put_states:
for (; i < nx; i++)
xfrm_state_put(xfrm[i]);
free_dst:
- if (dst0)
- dst_release_immediate(dst0);
- dst0 = ERR_PTR(err);
+ if (xdst0)
+ dst_release_immediate(&xdst0->u.dst);
+ xdst0 = ERR_PTR(err);
goto out;
}
@@ -1737,6 +1743,8 @@ void xfrm_policy_cache_flush(void)
bool found = 0;
int cpu;
+ might_sleep();
+
local_bh_disable();
rcu_read_lock();
for_each_possible_cpu(cpu) {
@@ -1800,7 +1808,7 @@ static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
for (i = 0; i < num; i++) {
if (!dst || dst->xfrm != xfrm[i])
return false;
- dst = dst->child;
+ dst = xfrm_dst_child(dst);
}
return xfrm_bundle_ok(xdst);
@@ -1813,6 +1821,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
{
struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
struct xfrm_dst *xdst, *old;
struct dst_entry *dst;
int err;
@@ -1833,6 +1842,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
sizeof(struct xfrm_policy *) * num_pols) == 0 &&
xfrm_xdst_can_reuse(xdst, xfrm, err)) {
dst_hold(&xdst->u.dst);
+ xfrm_pols_put(pols, num_pols);
while (err > 0)
xfrm_state_put(xfrm[--err]);
return xdst;
@@ -1840,7 +1850,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
old = xdst;
- dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
+ dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
return ERR_CAST(dst);
@@ -1880,8 +1890,8 @@ static void xfrm_policy_queue_process(struct timer_list *t)
xfrm_decode_session(skb, &fl, dst->ops->family);
spin_unlock(&pq->hold_queue.lock);
- dst_hold(dst->path);
- dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
+ dst_hold(xfrm_dst_path(dst));
+ dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
if (IS_ERR(dst))
goto purge_queue;
@@ -1910,8 +1920,8 @@ static void xfrm_policy_queue_process(struct timer_list *t)
skb = __skb_dequeue(&list);
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
- dst_hold(skb_dst(skb)->path);
- dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
+ dst_hold(xfrm_dst_path(skb_dst(skb)));
+ dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
if (IS_ERR(dst)) {
kfree_skb(skb);
continue;
@@ -2012,8 +2022,8 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
dst1->output = xdst_queue_output;
dst_hold(dst);
- dst1->child = dst;
- dst1->path = dst;
+ xfrm_dst_set_child(xdst, dst);
+ xdst->path = dst;
xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
@@ -2055,8 +2065,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
if (num_xfrms <= 0)
goto make_dummy_bundle;
+ local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
- xflo->dst_orig);
+ xflo->dst_orig);
+ local_bh_enable();
+
if (IS_ERR(xdst)) {
err = PTR_ERR(xdst);
if (err != -EAGAIN)
@@ -2143,9 +2156,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
+ local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(
pols, num_pols, fl,
family, dst_orig);
+ local_bh_enable();
+
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
err = PTR_ERR(xdst);
@@ -2576,7 +2592,7 @@ static int stale_bundle(struct dst_entry *dst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
- while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+ while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
dst->dev = dev_net(dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
@@ -2600,13 +2616,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
-static void xfrm_init_pmtu(struct dst_entry *dst)
+static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
{
- do {
- struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ while (nr--) {
+ struct xfrm_dst *xdst = bundle[nr];
u32 pmtu, route_mtu_cached;
+ struct dst_entry *dst;
- pmtu = dst_mtu(dst->child);
+ dst = &xdst->u.dst;
+ pmtu = dst_mtu(xfrm_dst_child(dst));
xdst->child_mtu_cached = pmtu;
pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
@@ -2618,7 +2636,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
pmtu = route_mtu_cached;
dst_metric_set(dst, RTAX_MTU, pmtu);
- } while ((dst = dst->next));
+ }
}
/* Check that the bundle accepts the flow and its components are
@@ -2627,19 +2645,20 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
static int xfrm_bundle_ok(struct xfrm_dst *first)
{
+ struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
struct dst_entry *dst = &first->u.dst;
- struct xfrm_dst *last;
+ struct xfrm_dst *xdst;
+ int start_from, nr;
u32 mtu;
- if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
+ if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
(dst->dev && !netif_running(dst->dev)))
return 0;
if (dst->flags & DST_XFRM_QUEUE)
return 1;
- last = NULL;
-
+ start_from = nr = 0;
do {
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -2651,9 +2670,11 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
return 0;
- mtu = dst_mtu(dst->child);
+ bundle[nr++] = xdst;
+
+ mtu = dst_mtu(xfrm_dst_child(dst));
if (xdst->child_mtu_cached != mtu) {
- last = xdst;
+ start_from = nr;
xdst->child_mtu_cached = mtu;
}
@@ -2661,30 +2682,30 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
return 0;
mtu = dst_mtu(xdst->route);
if (xdst->route_mtu_cached != mtu) {
- last = xdst;
+ start_from = nr;
xdst->route_mtu_cached = mtu;
}
- dst = dst->child;
+ dst = xfrm_dst_child(dst);
} while (dst->xfrm);
- if (likely(!last))
+ if (likely(!start_from))
return 1;
- mtu = last->child_mtu_cached;
- for (;;) {
- dst = &last->u.dst;
+ xdst = bundle[start_from - 1];
+ mtu = xdst->child_mtu_cached;
+ while (start_from--) {
+ dst = &xdst->u.dst;
mtu = xfrm_state_mtu(dst->xfrm, mtu);
- if (mtu > last->route_mtu_cached)
- mtu = last->route_mtu_cached;
+ if (mtu > xdst->route_mtu_cached)
+ mtu = xdst->route_mtu_cached;
dst_metric_set(dst, RTAX_MTU, mtu);
-
- if (last == first)
+ if (!start_from)
break;
- last = (struct xfrm_dst *)last->u.dst.next;
- last->child_mtu_cached = mtu;
+ xdst = bundle[start_from - 1];
+ xdst->child_mtu_cached = mtu;
}
return 1;
@@ -2692,22 +2713,20 @@ static int xfrm_bundle_ok(struct xfrm_dst *first)
static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
{
- return dst_metric_advmss(dst->path);
+ return dst_metric_advmss(xfrm_dst_path(dst));
}
static unsigned int xfrm_mtu(const struct dst_entry *dst)
{
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
- return mtu ? : dst_mtu(dst->path);
+ return mtu ? : dst_mtu(xfrm_dst_path(dst));
}
static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
const void *daddr)
{
- const struct dst_entry *path = dst->path;
-
- for (; dst != path; dst = dst->child) {
+ while (dst->xfrm) {
const struct xfrm_state *xfrm = dst->xfrm;
if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
@@ -2716,6 +2735,8 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
daddr = xfrm->coaddr;
else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
daddr = &xfrm->id.daddr;
+
+ dst = xfrm_dst_child(dst);
}
return daddr;
}
@@ -2724,7 +2745,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
- const struct dst_entry *path = dst->path;
+ const struct dst_entry *path = xfrm_dst_path(dst);
if (!skb)
daddr = xfrm_get_dst_nexthop(dst, daddr);
@@ -2733,7 +2754,7 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
- const struct dst_entry *path = dst->path;
+ const struct dst_entry *path = xfrm_dst_path(dst);
daddr = xfrm_get_dst_nexthop(dst, daddr);
path->ops->confirm_neigh(path, daddr);
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index ba2b539879bc..6d5f85f4e672 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -71,7 +71,6 @@ static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations xfrm_statistics_seq_fops = {
- .owner = THIS_MODULE,
.open = xfrm_statistics_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 8b23c5bcf8e8..1d38c6acf8af 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -551,6 +551,8 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
bitnr = replay_esn->replay_window - (diff - pos);
}
+ xfrm_dev_state_advance_esn(x);
+
nr = bitnr >> 5;
bitnr = bitnr & 0x1F;
replay_esn->bmp[nr] |= (1U << bitnr);
@@ -666,7 +668,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
if (unlikely(oseq < replay_esn->oseq)) {
XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
xo->seq.hi = oseq_hi;
-
+ replay_esn->oseq_hi = oseq_hi;
if (replay_esn->oseq_hi == 0) {
replay_esn->oseq--;
replay_esn->oseq_hi--;
@@ -678,7 +680,6 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
}
replay_esn->oseq = oseq;
- replay_esn->oseq_hi = oseq_hi;
if (xfrm_aevent_is_on(net))
x->repl->notify(x, XFRM_REPLAY_UPDATE);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888..54e21f19d722 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -313,13 +313,14 @@ retry:
if ((type && !try_module_get(type->owner)))
type = NULL;
+ rcu_read_unlock();
+
if (!type && try_load) {
request_module("xfrm-offload-%d-%d", family, proto);
- try_load = 0;
+ try_load = false;
goto retry;
}
- rcu_read_unlock();
return type;
}
@@ -1343,6 +1344,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
if (orig->aead) {
x->aead = xfrm_algo_aead_clone(orig->aead);
+ x->geniv = orig->geniv;
if (!x->aead)
goto error;
}
@@ -1533,8 +1535,12 @@ out:
err = -EINVAL;
spin_lock_bh(&x1->lock);
if (likely(x1->km.state == XFRM_STATE_VALID)) {
- if (x->encap && x1->encap)
+ if (x->encap && x1->encap &&
+ x->encap->encap_type == x1->encap->encap_type)
memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+ else if (x->encap || x1->encap)
+ goto fail;
+
if (x->coaddr && x1->coaddr) {
memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
}
@@ -1551,6 +1557,8 @@ out:
x->km.state = XFRM_STATE_DEAD;
__xfrm_state_put(x);
}
+
+fail:
spin_unlock_bh(&x1->lock);
xfrm_state_put(x1);
@@ -2048,6 +2056,13 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
struct xfrm_mgr *km;
struct xfrm_policy *pol = NULL;
+ if (!optval && !optlen) {
+ xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
+ xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
+ __sk_dst_reset(sk);
+ return 0;
+ }
+
if (optlen <= 0 || optlen > PAGE_SIZE)
return -EMSGSIZE;
@@ -2264,8 +2279,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
goto error;
}
- x->km.state = XFRM_STATE_VALID;
-
error:
return err;
}
@@ -2274,7 +2287,13 @@ EXPORT_SYMBOL(__xfrm_init_state);
int xfrm_init_state(struct xfrm_state *x)
{
- return __xfrm_init_state(x, true, false);
+ int err;
+
+ err = __xfrm_init_state(x, true, false);
+ if (!err)
+ x->km.state = XFRM_STATE_VALID;
+
+ return err;
}
EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 983b0233767b..7f52b8eb177d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
- if (attrs[XFRMA_OFFLOAD_DEV]) {
- err = xfrm_dev_state_add(net, x,
- nla_data(attrs[XFRMA_OFFLOAD_DEV]));
- if (err)
- goto error;
- }
-
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
goto error;
@@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ /* configure the hardware if offload is requested */
+ if (attrs[XFRMA_OFFLOAD_DEV]) {
+ err = xfrm_dev_state_add(net, x,
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+ if (err)
+ goto error;
+ }
+
return x;
error:
@@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
+ if (x->km.state == XFRM_STATE_VOID)
+ x->km.state = XFRM_STATE_VALID;
+
c.seq = nlh->nlmsg_seq;
c.portid = nlh->nlmsg_pid;
c.event = nlh->nlmsg_type;
@@ -1419,11 +1423,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
{
+ u16 prev_family;
int i;
if (nr > XFRM_MAX_DEPTH)
return -EINVAL;
+ prev_family = family;
+
for (i = 0; i < nr; i++) {
/* We never validated the ut->family value, so many
* applications simply leave it at zero. The check was
@@ -1435,6 +1442,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
if (!ut[i].family)
ut[i].family = family;
+ if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
+ (ut[i].family != prev_family))
+ return -EINVAL;
+
+ prev_family = ut[i].family;
+
switch (ut[i].family) {
case AF_INET:
break;
@@ -1445,6 +1458,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
default:
return -EINVAL;
}
+
+ switch (ut[i].id.proto) {
+ case IPPROTO_AH:
+ case IPPROTO_ESP:
+ case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+#endif
+ case IPSEC_PROTO_ANY:
+ break;
+ default:
+ return -EINVAL;
+ }
+
}
return 0;
@@ -2470,7 +2498,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
[XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
- [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 },
+ [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
};
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {